From 6a5ed9e2e9d158c17e4e6d678cda06efdb2fc6be Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Mon, 22 May 2023 09:56:27 -0700 Subject: [PATCH] microsoft/compiler: Support load_ubo_vec4 Add support for 16-bit UBO loads, delete handling of byte-addressed UBO loads (which I think was never used anyway) and add handling for the component const index to optimize out unneeded extractResults. Part-of: --- src/microsoft/compiler/dxil_module.c | 12 ++++-- src/microsoft/compiler/nir_to_dxil.c | 55 ++++++---------------------- 2 files changed, 20 insertions(+), 47 deletions(-) diff --git a/src/microsoft/compiler/dxil_module.c b/src/microsoft/compiler/dxil_module.c index 8b13c04c24a..be309828eb8 100644 --- a/src/microsoft/compiler/dxil_module.c +++ b/src/microsoft/compiler/dxil_module.c @@ -705,12 +705,12 @@ const struct dxil_type * dxil_module_get_cbuf_ret_type(struct dxil_module *mod, enum overload_type overload) { const struct dxil_type *overload_type = dxil_get_overload_type(mod, overload); - const struct dxil_type *fields[4] = { overload_type, overload_type, overload_type, overload_type }; + const struct dxil_type *fields[8] = { overload_type, overload_type, overload_type, overload_type, + overload_type, overload_type, overload_type, overload_type }; unsigned num_fields; char name[64]; - snprintf(name, sizeof(name), "dx.types.CBufRet.%s", dxil_overload_suffix(overload)); - + const char *additional = ""; switch (overload) { case DXIL_I32: case DXIL_F32: @@ -720,9 +720,15 @@ dxil_module_get_cbuf_ret_type(struct dxil_module *mod, enum overload_type overlo case DXIL_F64: num_fields = 2; break; + case DXIL_I16: + case DXIL_F16: + num_fields = 8; + additional = ".8"; + break; default: unreachable("unexpected overload type"); } + snprintf(name, sizeof(name), "dx.types.CBufRet.%s%s", dxil_overload_suffix(overload), additional); return dxil_module_get_struct_type(mod, name, fields, num_fields); } diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index 204e40345de..d585d29122b 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -126,6 +126,7 @@ nir_options = { .lower_pack_unorm_2x16 = true, .lower_pack_64_2x32_split = true, .lower_pack_32_2x16_split = true, + .lower_pack_64_4x16 = true, .lower_unpack_64_2x32_split = true, .lower_unpack_32_2x16_split = true, .lower_unpack_half_2x16 = true, @@ -3557,47 +3558,9 @@ emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr) } static bool -emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr) +emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr) { - const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); - if (!handle) - return false; - - const struct dxil_value *offset; - nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]); - if (const_offset) { - offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4); - } else { - const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint); - const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4); - if (!offset_src || !c4) - return false; - - offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0); - } - - enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_float); - const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload); - - if (!agg) - return false; - - for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { - const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i); - store_dest(ctx, &intr->dest, i, retval); - } - if (nir_dest_bit_size(intr->dest) == 16) - ctx->mod.feats.native_low_precision = true; - return true; -} - -static bool -emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr) -{ - assert(nir_dest_num_components(intr->dest) <= 4); - assert(nir_dest_bit_size(intr->dest) == 32); - - const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); + const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); const struct dxil_value *offset = get_src(ctx, &intr->src[1], 0, nir_type_uint); @@ -3609,9 +3572,11 @@ emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr) if (!agg) return false; + unsigned first_component = nir_intrinsic_has_component(intr) ? + nir_intrinsic_component(intr) : 0; for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++) store_dest(ctx, &intr->dest, i, - dxil_emit_extractval(&ctx->mod, agg, i)); + dxil_emit_extractval(&ctx->mod, agg, i + first_component)); if (nir_dest_bit_size(intr->dest) == 16) ctx->mod.feats.native_low_precision = true; @@ -4878,10 +4843,9 @@ emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) return emit_atomic_deref(ctx, intr); case nir_intrinsic_deref_atomic_swap: return emit_atomic_deref_swap(ctx, intr); - case nir_intrinsic_load_ubo: - return emit_load_ubo(ctx, intr); case nir_intrinsic_load_ubo_dxil: - return emit_load_ubo_dxil(ctx, intr); + case nir_intrinsic_load_ubo_vec4: + return emit_load_ubo_vec4(ctx, intr); case nir_intrinsic_load_primitive_id: return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID", DXIL_INTR_PRIMITIVE_ID, nir_type_int); @@ -6527,6 +6491,9 @@ nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts, NIR_PASS_V(s, nir_lower_pack); NIR_PASS_V(s, dxil_nir_lower_system_values); NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out); + + NIR_PASS_V(s, nir_lower_ubo_vec4); + if (opts->shader_model_max < SHADER_MODEL_6_6) { /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback, * so both load- and is- will be emulated eventually.