d3d12: Compile, bind, and cache compute PSOs

Reviewed-by: Sil Vilerino <sivileri@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14367>
This commit is contained in:
Jesse Natalie 2021-12-31 09:58:50 -08:00 committed by Marge Bot
parent e350d1ab09
commit 6d38a35afb
9 changed files with 292 additions and 75 deletions

View file

@ -182,7 +182,7 @@ d3d12_start_batch(struct d3d12_context *ctx, struct d3d12_batch *batch)
ctx->cmdlist->SetDescriptorHeaps(2, heaps);
ctx->cmdlist_dirty = ~0;
for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
for (int i = 0; i < PIPE_SHADER_TYPES; ++i)
ctx->shader_dirty[i] = ~0;
if (!ctx->queries_disabled)

View file

@ -1047,62 +1047,17 @@ update_so_info(struct pipe_stream_output_info *so_info,
return so_outputs;
}
struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context *ctx,
pipe_shader_type stage,
const struct pipe_shader_state *shader)
static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context *ctx,
struct d3d12_shader_selector *sel,
struct nir_shader *nir,
struct d3d12_shader_selector *prev,
struct d3d12_shader_selector *next)
{
struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
sel->stage = stage;
struct nir_shader *nir = NULL;
if (shader->type == PIPE_SHADER_IR_NIR) {
nir = (nir_shader *)shader->ir.nir;
} else {
assert(shader->type == PIPE_SHADER_IR_TGSI);
nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
}
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
unsigned tex_scan_result = scan_texture_use(nir);
sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
update_so_info(&sel->so_info, nir->info.outputs_written);
assert(nir != NULL);
d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
0 : VARYING_BIT_PRIMITIVE_ID;
uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
(1ull << FRAG_RESULT_STENCIL) :
VARYING_BIT_PRIMITIVE_ID;
d3d12_fix_io_uint_type(nir, in_mask, out_mask);
NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
if (nir->info.stage != MESA_SHADER_VERTEX)
nir->info.inputs_read =
dxil_reassign_driver_locations(nir, nir_var_shader_in,
prev ? prev->current->nir->info.outputs_written : 0);
else
nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
if (nir->info.stage != MESA_SHADER_FRAGMENT) {
nir->info.outputs_written =
dxil_reassign_driver_locations(nir, nir_var_shader_out,
next ? next->current->nir->info.inputs_read : 0);
} else {
NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
dxil_sort_ps_outputs(nir);
}
/* Integer cube maps are not supported in DirectX because sampling is not supported
* on integer textures and TextureLoad is not supported for cube maps, so we have to
* lower integer cube maps to be handled like 2D textures arrays*/
@ -1140,6 +1095,81 @@ d3d12_create_shader(struct d3d12_context *ctx,
return sel;
}
struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context *ctx,
pipe_shader_type stage,
const struct pipe_shader_state *shader)
{
struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
sel->stage = stage;
struct nir_shader *nir = NULL;
if (shader->type == PIPE_SHADER_IR_NIR) {
nir = (nir_shader *)shader->ir.nir;
} else {
assert(shader->type == PIPE_SHADER_IR_TGSI);
nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
}
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
update_so_info(&sel->so_info, nir->info.outputs_written);
assert(nir != NULL);
d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
0 : VARYING_BIT_PRIMITIVE_ID;
uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
(1ull << FRAG_RESULT_STENCIL) :
VARYING_BIT_PRIMITIVE_ID;
d3d12_fix_io_uint_type(nir, in_mask, out_mask);
NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
if (nir->info.stage != MESA_SHADER_VERTEX)
nir->info.inputs_read =
dxil_reassign_driver_locations(nir, nir_var_shader_in,
prev ? prev->current->nir->info.outputs_written : 0);
else
nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
if (nir->info.stage != MESA_SHADER_FRAGMENT) {
nir->info.outputs_written =
dxil_reassign_driver_locations(nir, nir_var_shader_out,
next ? next->current->nir->info.inputs_read : 0);
} else {
NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
dxil_sort_ps_outputs(nir);
}
return d3d12_create_shader_impl(ctx, sel, nir, prev, next);
}
struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context *ctx,
const struct pipe_compute_state *shader)
{
struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
sel->stage = PIPE_SHADER_COMPUTE;
struct nir_shader *nir = NULL;
if (shader->ir_type == PIPE_SHADER_IR_NIR) {
nir = (nir_shader *)shader->prog;
} else {
assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
}
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr);
}
void
d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
{

View file

@ -197,6 +197,10 @@ d3d12_create_shader(struct d3d12_context *ctx,
enum pipe_shader_type stage,
const struct pipe_shader_state *shader);
struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context *ctx,
const struct pipe_compute_state *shader);
void
d3d12_shader_free(struct d3d12_shader_selector *shader);

View file

@ -77,6 +77,7 @@ d3d12_context_destroy(struct pipe_context *pctx)
slab_destroy_child(&ctx->transfer_pool_unsync);
d3d12_gs_variant_cache_destroy(ctx);
d3d12_gfx_pipeline_state_cache_destroy(ctx);
d3d12_compute_pipeline_state_cache_destroy(ctx);
d3d12_root_signature_cache_destroy(ctx);
u_suballocator_destroy(&ctx->query_allocator);
@ -1128,6 +1129,39 @@ d3d12_delete_gs_state(struct pipe_context *pctx, void *gs)
(struct d3d12_shader_selector *) gs);
}
static void *
d3d12_create_compute_state(struct pipe_context *pctx,
const struct pipe_compute_state *shader)
{
return d3d12_create_compute_shader(d3d12_context(pctx), shader);
}
static void
d3d12_bind_compute_state(struct pipe_context *pctx, void *css)
{
d3d12_context(pctx)->compute_state = (struct d3d12_shader_selector *)css;
}
static void
d3d12_delete_compute_state(struct pipe_context *pctx, void *cs)
{
struct d3d12_context *ctx = d3d12_context(pctx);
struct d3d12_shader_selector *shader = (struct d3d12_shader_selector *)cs;
d3d12_compute_pipeline_state_cache_invalidate_shader(ctx, shader);
/* Make sure the pipeline state no longer reference the deleted shader */
struct d3d12_shader *iter = shader->first;
while (iter) {
if (ctx->compute_pipeline_state.stage == iter) {
ctx->compute_pipeline_state.stage = NULL;
break;
}
iter = iter->next_variant;
}
d3d12_shader_free(shader);
}
static bool
d3d12_init_polygon_stipple(struct pipe_context *pctx)
{
@ -2163,6 +2197,10 @@ d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
ctx->base.bind_gs_state = d3d12_bind_gs_state;
ctx->base.delete_gs_state = d3d12_delete_gs_state;
ctx->base.create_compute_state = d3d12_create_compute_state;
ctx->base.bind_compute_state = d3d12_bind_compute_state;
ctx->base.delete_compute_state = d3d12_delete_compute_state;
ctx->base.set_polygon_stipple = d3d12_set_polygon_stipple;
ctx->base.set_vertex_buffers = d3d12_set_vertex_buffers;
ctx->base.set_viewport_states = d3d12_set_viewport_states;
@ -2224,6 +2262,7 @@ d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
}
d3d12_gfx_pipeline_state_cache_init(ctx);
d3d12_compute_pipeline_state_cache_init(ctx);
d3d12_root_signature_cache_init(ctx);
d3d12_gs_variant_cache_init(ctx);

View file

@ -162,6 +162,7 @@ struct d3d12_context {
struct u_suballocator query_allocator;
struct u_suballocator so_allocator;
struct hash_table *pso_cache;
struct hash_table *compute_pso_cache;
struct hash_table *root_signature_cache;
struct hash_table *gs_variant_cache;
@ -213,12 +214,15 @@ struct d3d12_context {
unsigned fake_so_buffer_factor;
struct d3d12_shader_selector *gfx_stages[D3D12_GFX_SHADER_STAGES];
struct d3d12_shader_selector *compute_state;
struct d3d12_gfx_pipeline_state gfx_pipeline_state;
unsigned shader_dirty[D3D12_GFX_SHADER_STAGES];
struct d3d12_compute_pipeline_state compute_pipeline_state;
unsigned shader_dirty[PIPE_SHADER_TYPES];
unsigned state_dirty;
unsigned cmdlist_dirty;
ID3D12PipelineState *current_pso;
ID3D12PipelineState *current_gfx_pso;
ID3D12PipelineState *current_compute_pso;
bool reverse_depth_range;
ID3D12Fence *cmdqueue_fence;

View file

@ -761,9 +761,9 @@ d3d12_draw_vbo(struct pipe_context *pctx,
}
}
if (!ctx->current_pso || ctx->state_dirty & D3D12_DIRTY_PSO) {
ctx->current_pso = d3d12_get_gfx_pipeline_state(ctx);
assert(ctx->current_pso);
if (!ctx->current_gfx_pso || ctx->state_dirty & D3D12_DIRTY_PSO) {
ctx->current_gfx_pso = d3d12_get_gfx_pipeline_state(ctx);
assert(ctx->current_gfx_pso);
}
ctx->cmdlist_dirty |= ctx->state_dirty;
@ -778,9 +778,9 @@ d3d12_draw_vbo(struct pipe_context *pctx,
}
if (ctx->cmdlist_dirty & D3D12_DIRTY_PSO) {
assert(ctx->current_pso);
d3d12_batch_reference_object(batch, ctx->current_pso);
ctx->cmdlist->SetPipelineState(ctx->current_pso);
assert(ctx->current_gfx_pso);
d3d12_batch_reference_object(batch, ctx->current_gfx_pso);
ctx->cmdlist->SetPipelineState(ctx->current_gfx_pso);
}
D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES];

View file

@ -33,11 +33,16 @@
#include <dxguids/dxguids.h>
struct d3d12_pso_entry {
struct d3d12_gfx_pso_entry {
struct d3d12_gfx_pipeline_state key;
ID3D12PipelineState *pso;
};
struct d3d12_compute_pso_entry {
struct d3d12_compute_pipeline_state key;
ID3D12PipelineState *pso;
};
static const char *
get_semantic_name(int slot, unsigned *index)
{
@ -295,7 +300,7 @@ d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx)
struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->pso_cache, hash,
&ctx->gfx_pipeline_state);
if (!entry) {
struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)MALLOC(sizeof(struct d3d12_pso_entry));
struct d3d12_gfx_pso_entry *data = (struct d3d12_gfx_pso_entry *)MALLOC(sizeof(struct d3d12_gfx_pso_entry));
if (!data)
return NULL;
@ -310,7 +315,7 @@ d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx)
assert(entry);
}
return ((struct d3d12_pso_entry *)(entry->data))->pso;
return ((struct d3d12_gfx_pso_entry *)(entry->data))->pso;
}
void
@ -320,28 +325,28 @@ d3d12_gfx_pipeline_state_cache_init(struct d3d12_context *ctx)
}
static void
delete_entry(struct hash_entry *entry)
delete_gfx_entry(struct hash_entry *entry)
{
struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data;
struct d3d12_gfx_pso_entry *data = (struct d3d12_gfx_pso_entry *)entry->data;
data->pso->Release();
FREE(data);
}
static void
remove_entry(struct d3d12_context *ctx, struct hash_entry *entry)
remove_gfx_entry(struct d3d12_context *ctx, struct hash_entry *entry)
{
struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data;
struct d3d12_gfx_pso_entry *data = (struct d3d12_gfx_pso_entry *)entry->data;
if (ctx->current_pso == data->pso)
ctx->current_pso = NULL;
if (ctx->current_gfx_pso == data->pso)
ctx->current_gfx_pso = NULL;
_mesa_hash_table_remove(ctx->pso_cache, entry);
delete_entry(entry);
delete_gfx_entry(entry);
}
void
d3d12_gfx_pipeline_state_cache_destroy(struct d3d12_context *ctx)
{
_mesa_hash_table_destroy(ctx->pso_cache, delete_entry);
_mesa_hash_table_destroy(ctx->pso_cache, delete_gfx_entry);
}
void
@ -350,7 +355,7 @@ d3d12_gfx_pipeline_state_cache_invalidate(struct d3d12_context *ctx, const void
hash_table_foreach(ctx->pso_cache, entry) {
const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key;
if (key->blend == state || key->zsa == state || key->rast == state)
remove_entry(ctx, entry);
remove_gfx_entry(ctx, entry);
}
}
@ -365,7 +370,123 @@ d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx,
hash_table_foreach(ctx->pso_cache, entry) {
const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key;
if (key->stages[stage] == shader)
remove_entry(ctx, entry);
remove_gfx_entry(ctx, entry);
}
shader = shader->next_variant;
}
}
static ID3D12PipelineState *
create_compute_pipeline_state(struct d3d12_context *ctx)
{
struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
struct d3d12_compute_pipeline_state *state = &ctx->compute_pipeline_state;
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 };
pso_desc.pRootSignature = state->root_signature;
if (state->stage) {
auto shader = state->stage;
pso_desc.CS.BytecodeLength = shader->bytecode_length;
pso_desc.CS.pShaderBytecode = shader->bytecode;
}
pso_desc.NodeMask = 0;
pso_desc.CachedPSO.pCachedBlob = NULL;
pso_desc.CachedPSO.CachedBlobSizeInBytes = 0;
pso_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
ID3D12PipelineState *ret;
if (FAILED(screen->dev->CreateComputePipelineState(&pso_desc,
IID_PPV_ARGS(&ret)))) {
debug_printf("D3D12: CreateComputePipelineState failed!\n");
return NULL;
}
return ret;
}
static uint32_t
hash_compute_pipeline_state(const void *key)
{
return _mesa_hash_data(key, sizeof(struct d3d12_compute_pipeline_state));
}
static bool
equals_compute_pipeline_state(const void *a, const void *b)
{
return memcmp(a, b, sizeof(struct d3d12_compute_pipeline_state)) == 0;
}
ID3D12PipelineState *
d3d12_get_compute_pipeline_state(struct d3d12_context *ctx)
{
uint32_t hash = hash_compute_pipeline_state(&ctx->compute_pipeline_state);
struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->compute_pso_cache, hash,
&ctx->compute_pipeline_state);
if (!entry) {
struct d3d12_compute_pso_entry *data = (struct d3d12_compute_pso_entry *)MALLOC(sizeof(struct d3d12_compute_pso_entry));
if (!data)
return NULL;
data->key = ctx->compute_pipeline_state;
data->pso = create_compute_pipeline_state(ctx);
if (!data->pso) {
FREE(data);
return NULL;
}
entry = _mesa_hash_table_insert_pre_hashed(ctx->compute_pso_cache, hash, &data->key, data);
assert(entry);
}
return ((struct d3d12_compute_pso_entry *)(entry->data))->pso;
}
void
d3d12_compute_pipeline_state_cache_init(struct d3d12_context *ctx)
{
ctx->compute_pso_cache = _mesa_hash_table_create(NULL, NULL, equals_compute_pipeline_state);
}
static void
delete_compute_entry(struct hash_entry *entry)
{
struct d3d12_compute_pso_entry *data = (struct d3d12_compute_pso_entry *)entry->data;
data->pso->Release();
FREE(data);
}
static void
remove_compute_entry(struct d3d12_context *ctx, struct hash_entry *entry)
{
struct d3d12_compute_pso_entry *data = (struct d3d12_compute_pso_entry *)entry->data;
if (ctx->current_compute_pso == data->pso)
ctx->current_compute_pso = NULL;
_mesa_hash_table_remove(ctx->compute_pso_cache, entry);
delete_compute_entry(entry);
}
void
d3d12_compute_pipeline_state_cache_destroy(struct d3d12_context *ctx)
{
_mesa_hash_table_destroy(ctx->compute_pso_cache, delete_compute_entry);
}
void
d3d12_compute_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx,
struct d3d12_shader_selector *selector)
{
struct d3d12_shader *shader = selector->first;
while (shader) {
hash_table_foreach(ctx->compute_pso_cache, entry) {
const struct d3d12_compute_pipeline_state *key = (struct d3d12_compute_pipeline_state *)entry->key;
if (key->stage == shader)
remove_compute_entry(ctx, entry);
}
shader = shader->next_variant;
}

View file

@ -81,6 +81,11 @@ struct d3d12_gfx_pipeline_state {
enum pipe_prim_type prim_type;
};
struct d3d12_compute_pipeline_state {
ID3D12RootSignature *root_signature;
struct d3d12_shader *stage;
};
DXGI_FORMAT
d3d12_rtv_format(struct d3d12_context *ctx, unsigned index);
@ -101,4 +106,17 @@ d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx,
enum pipe_shader_type stage,
struct d3d12_shader_selector *selector);
void
d3d12_compute_pipeline_state_cache_init(struct d3d12_context *ctx);
void
d3d12_compute_pipeline_state_cache_destroy(struct d3d12_context *ctx);
ID3D12PipelineState *
d3d12_get_compute_pipeline_state(struct d3d12_context *ctx);
void
d3d12_compute_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx,
struct d3d12_shader_selector *selector);
#endif

View file

@ -366,7 +366,8 @@ d3d12_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
if (shader == PIPE_SHADER_VERTEX ||
shader == PIPE_SHADER_FRAGMENT ||
shader == PIPE_SHADER_GEOMETRY)
shader == PIPE_SHADER_GEOMETRY ||
shader == PIPE_SHADER_COMPUTE)
return INT_MAX;
return 0;