mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 07:40:11 +01:00
microsoft/compiler: Support load_ubo_vec4
Add support for 16-bit UBO loads, delete handling of byte-addressed UBO loads (which I think was never used anyway) and add handling for the component const index to optimize out unneeded extractResults. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23173>
This commit is contained in:
parent
f960b37986
commit
6a5ed9e2e9
2 changed files with 20 additions and 47 deletions
|
|
@ -705,12 +705,12 @@ const struct dxil_type *
|
|||
dxil_module_get_cbuf_ret_type(struct dxil_module *mod, enum overload_type overload)
|
||||
{
|
||||
const struct dxil_type *overload_type = dxil_get_overload_type(mod, overload);
|
||||
const struct dxil_type *fields[4] = { overload_type, overload_type, overload_type, overload_type };
|
||||
const struct dxil_type *fields[8] = { overload_type, overload_type, overload_type, overload_type,
|
||||
overload_type, overload_type, overload_type, overload_type };
|
||||
unsigned num_fields;
|
||||
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "dx.types.CBufRet.%s", dxil_overload_suffix(overload));
|
||||
|
||||
const char *additional = "";
|
||||
switch (overload) {
|
||||
case DXIL_I32:
|
||||
case DXIL_F32:
|
||||
|
|
@ -720,9 +720,15 @@ dxil_module_get_cbuf_ret_type(struct dxil_module *mod, enum overload_type overlo
|
|||
case DXIL_F64:
|
||||
num_fields = 2;
|
||||
break;
|
||||
case DXIL_I16:
|
||||
case DXIL_F16:
|
||||
num_fields = 8;
|
||||
additional = ".8";
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected overload type");
|
||||
}
|
||||
snprintf(name, sizeof(name), "dx.types.CBufRet.%s%s", dxil_overload_suffix(overload), additional);
|
||||
|
||||
return dxil_module_get_struct_type(mod, name, fields, num_fields);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ nir_options = {
|
|||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_64_2x32_split = true,
|
||||
.lower_pack_32_2x16_split = true,
|
||||
.lower_pack_64_4x16 = true,
|
||||
.lower_unpack_64_2x32_split = true,
|
||||
.lower_unpack_32_2x16_split = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
|
|
@ -3557,47 +3558,9 @@ emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
|
||||
static bool
|
||||
emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
|
||||
if (!handle)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *offset;
|
||||
nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);
|
||||
if (const_offset) {
|
||||
offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);
|
||||
} else {
|
||||
const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);
|
||||
if (!offset_src || !c4)
|
||||
return false;
|
||||
|
||||
offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);
|
||||
}
|
||||
|
||||
enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_float);
|
||||
const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
|
||||
|
||||
if (!agg)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
|
||||
const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);
|
||||
store_dest(ctx, &intr->dest, i, retval);
|
||||
}
|
||||
if (nir_dest_bit_size(intr->dest) == 16)
|
||||
ctx->mod.feats.native_low_precision = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
assert(nir_dest_num_components(intr->dest) <= 4);
|
||||
assert(nir_dest_bit_size(intr->dest) == 32);
|
||||
|
||||
const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
|
||||
const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
|
||||
const struct dxil_value *offset =
|
||||
get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
|
||||
|
|
@ -3609,9 +3572,11 @@ emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
|||
if (!agg)
|
||||
return false;
|
||||
|
||||
unsigned first_component = nir_intrinsic_has_component(intr) ?
|
||||
nir_intrinsic_component(intr) : 0;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)
|
||||
store_dest(ctx, &intr->dest, i,
|
||||
dxil_emit_extractval(&ctx->mod, agg, i));
|
||||
dxil_emit_extractval(&ctx->mod, agg, i + first_component));
|
||||
|
||||
if (nir_dest_bit_size(intr->dest) == 16)
|
||||
ctx->mod.feats.native_low_precision = true;
|
||||
|
|
@ -4878,10 +4843,9 @@ emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
|||
return emit_atomic_deref(ctx, intr);
|
||||
case nir_intrinsic_deref_atomic_swap:
|
||||
return emit_atomic_deref_swap(ctx, intr);
|
||||
case nir_intrinsic_load_ubo:
|
||||
return emit_load_ubo(ctx, intr);
|
||||
case nir_intrinsic_load_ubo_dxil:
|
||||
return emit_load_ubo_dxil(ctx, intr);
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
return emit_load_ubo_vec4(ctx, intr);
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
|
||||
DXIL_INTR_PRIMITIVE_ID, nir_type_int);
|
||||
|
|
@ -6527,6 +6491,9 @@ nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
|
|||
NIR_PASS_V(s, nir_lower_pack);
|
||||
NIR_PASS_V(s, dxil_nir_lower_system_values);
|
||||
NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_ubo_vec4);
|
||||
|
||||
if (opts->shader_model_max < SHADER_MODEL_6_6) {
|
||||
/* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
|
||||
* so both load- and is- will be emulated eventually.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue