microsoft/clc: Hook up printf

Rewrites the original lowering pass to use the one shared with Clover,
instead only handling the new load_printf_buffer_address intrinsic.

Exports the new metadata to the runtime containing strings and arg sizes.

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8254>
This commit is contained in:
Jesse Natalie 2020-11-18 12:22:27 -08:00
parent 76788353b2
commit 4dbcf24f68
5 changed files with 79 additions and 74 deletions

View file

@ -1051,6 +1051,7 @@ clc_to_dxil(struct clc_context *ctx,
.kernel = true,
.kernel_image = true,
.literal_sampler = true,
.printf = true,
},
};
nir_shader_compiler_options nir_options =
@ -1235,9 +1236,26 @@ clc_to_dxil(struct clc_context *ctx,
// Lower memcpy
NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref);
bool has_printf = false;
//NIR_PASS(has_printf, nir, clc_nir_lower_printf, uav_id);
metadata->printf_uav_id = has_printf ? uav_id++ : -1;
// Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't
// necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later
assert(nir->scratch_size == 0);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
nir_lower_printf_options printf_options = {
.treat_doubles_as_floats = true,
.max_buffer_size = 1024 * 1024
};
NIR_PASS_V(nir, nir_lower_printf, &printf_options);
metadata->printf.info_count = nir->printf_info_count;
metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info));
for (unsigned i = 0; i < nir->printf_info_count; i++) {
metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size);
memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size);
metadata->printf.infos[i].num_args = nir->printf_info[i].num_args;
metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned));
memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
}
// copy propagate to prepare for lower_explicit_io
NIR_PASS_V(nir, nir_split_var_copies);
@ -1258,8 +1276,8 @@ clc_to_dxil(struct clc_context *ctx,
int_sampler_states, NULL, 14.0f);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
assert(nir->scratch_size == 0);
nir->scratch_size = 0;
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared | nir_var_function_temp | nir_var_uniform | nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
@ -1267,6 +1285,11 @@ clc_to_dxil(struct clc_context *ctx,
NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
NIR_PASS_V(nir, clc_lower_constant_to_ssbo, dxil->kernel, &uav_id);
NIR_PASS_V(nir, clc_lower_global_to_ssbo);
bool has_printf = false;
NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id);
metadata->printf.uav_id = has_printf ? uav_id++ : -1;
NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
NIR_PASS_V(nir, split_unaligned_loads_stores);
@ -1432,6 +1455,12 @@ void clc_free_dxil_object(struct clc_dxil_object *dxil)
for (unsigned i = 0; i < dxil->metadata.num_consts; i++)
free(dxil->metadata.consts[i].data);
for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) {
free(dxil->metadata.printf.infos[i].arg_sizes);
free(dxil->metadata.printf.infos[i].str);
}
free(dxil->metadata.printf.infos);
free(dxil->binary.data);
free(dxil);
}

View file

@ -118,6 +118,12 @@ struct clc_object {
#define CLC_MAX_BINDINGS_PER_ARG 3
#define CLC_MAX_SAMPLERS 16
struct clc_printf_info {
unsigned num_args;
unsigned *arg_sizes;
char *str;
};
struct clc_dxil_metadata {
struct {
unsigned offset;
@ -165,7 +171,11 @@ struct clc_dxil_metadata {
uint16_t local_size[3];
uint16_t local_size_hint[3];
int printf_uav_id;
struct {
unsigned info_count;
struct clc_printf_info *infos;
int uav_id;
} printf;
};
struct clc_dxil_object {

View file

@ -245,81 +245,48 @@ add_printf_var(struct nir_shader *nir, unsigned uav_id)
nir_variable *var =
nir_variable_create(nir, nir_var_mem_ssbo,
glsl_array_type(glsl_uint_type(), printf_array_size, sizeof(unsigned)),
"kernel_work_properies");
"printf");
var->data.binding = uav_id;
return var;
}
static void
lower_printf_impl(nir_builder *b, nir_intrinsic_instr *instr, nir_variable *var)
bool
clc_lower_printf_base(nir_shader *nir, unsigned uav_id)
{
/* Atomic add a buffer size counter to determine where to write.
* If overflowed, return -1, otherwise, store the arguments and return 0.
*/
b->cursor = nir_before_instr(&instr->instr);
nir_deref_instr *ssbo_deref = nir_build_deref_var(b, var);
nir_deref_instr *counter_deref = nir_build_deref_array_imm(b, ssbo_deref, 0);
nir_deref_instr *struct_deref = nir_instr_as_deref(instr->src[1].ssa->parent_instr);
nir_variable *struct_var = nir_deref_instr_get_variable(struct_deref);
const struct glsl_type *struct_type = struct_var->type;
/* Align the struct size to 4 for natural SSBO alignment */
int struct_size = align(glsl_get_cl_size(struct_type), 4);
nir_variable *printf_var = NULL;
nir_ssa_def *printf_deref = NULL;
nir_foreach_function(func, nir) {
nir_builder b;
nir_builder_init(&b, func->impl);
b.cursor = nir_before_instr(nir_block_first_instr(nir_start_block(func->impl)));
bool progress = false;
/* Hardcoding 64bit pointers to simplify some code below */
assert(instr->src[0].ssa->num_components == 1 && instr->src[0].ssa->bit_size == 64);
nir_foreach_block(block, func->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_printf_buffer_address)
continue;
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, nir_intrinsic_deref_atomic_add);
nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
atomic->src[0] = nir_src_for_ssa(&counter_deref->dest.ssa);
atomic->src[1] = nir_src_for_ssa(nir_imm_int(b, struct_size + sizeof(uint64_t)));
nir_builder_instr_insert(b, &atomic->instr);
int max_valid_offset =
glsl_get_cl_size(var->type) - /* buffer size */
struct_size - /* printf args size */
sizeof(uint64_t) - /* format string */
sizeof(int); /* the first int in the buffer is for the counter */
nir_push_if(b, nir_ilt(b, &atomic->dest.ssa, nir_imm_int(b, max_valid_offset)));
nir_ssa_def *printf_succ_val = nir_imm_int(b, 0);
nir_ssa_def *start_offset = nir_u2u64(b, nir_iadd(b, &atomic->dest.ssa, nir_imm_int(b, sizeof(int))));
nir_deref_instr *as_byte_array = nir_build_deref_cast(b, &ssbo_deref->dest.ssa, nir_var_mem_ssbo, glsl_uint8_t_type(), 1);
nir_deref_instr *as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, start_offset);
nir_deref_instr *format_string_write_deref =
nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, glsl_uint64_t_type(), 8);
nir_store_deref(b, format_string_write_deref, instr->src[0].ssa, ~0);
for (unsigned i = 0; i < glsl_get_length(struct_type); ++i) {
nir_ssa_def *field_offset_from_start = nir_imm_int64(b, glsl_get_struct_field_offset(struct_type, i) + sizeof(uint64_t));
nir_ssa_def *field_offset = nir_iadd(b, start_offset, field_offset_from_start);
const struct glsl_type *field_type = glsl_get_struct_field(struct_type, i);
nir_deref_instr *field_read_deref = nir_build_deref_struct(b, struct_deref, i);
nir_ssa_def *field_value = nir_load_deref(b, field_read_deref);
/* Clang does promotion of arguments to their "native" size. That means that any floats
* have been converted to doubles for the call to printf. Since we don't support doubles,
* convert them back here; copy-prop and other optimizations should remove all hint of doubles.
*/
if (glsl_get_base_type(field_type) == GLSL_TYPE_DOUBLE) {
field_value = nir_f2f32(b, field_value);
field_type = glsl_float_type();
if (!printf_var) {
printf_var = add_printf_var(nir, uav_id);
nir_deref_instr *deref = nir_build_deref_var(&b, printf_var);
printf_deref = &deref->dest.ssa;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(printf_deref));
}
}
as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, field_offset);
nir_deref_instr *field_write_deref =
nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, field_type, glsl_get_cl_size(field_type));
nir_store_deref(b, field_write_deref, field_value, ~0);
if (progress)
nir_metadata_preserve(func->impl, nir_metadata_loop_analysis |
nir_metadata_block_index |
nir_metadata_dominance);
else
nir_metadata_preserve(func->impl, nir_metadata_all);
}
nir_push_else(b, NULL);
nir_ssa_def *printf_fail_val = nir_imm_int(b, -1);
nir_pop_if(b, NULL);
nir_ssa_def *return_value = nir_if_phi(b, printf_succ_val, printf_fail_val);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(return_value));
nir_instr_remove(&instr->instr);
return printf_var != NULL;
}
static nir_variable *

View file

@ -31,8 +31,7 @@ bool
clc_nir_lower_system_values(nir_shader *nir, nir_variable *var);
bool dxil_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var);
bool
clc_nir_lower_printf(nir_shader *nir, unsigned uav_id);
bool clc_lower_printf_base(nir_shader *nir, unsigned uav_id);
bool
clc_nir_dedupe_const_samplers(nir_shader *nir);

View file

@ -557,8 +557,8 @@ ComputeTest::run_shader_with_raw_args(Shader shader,
}
}
if (dxil->metadata.printf_uav_id > 0)
add_uav_resource(resources, 0, dxil->metadata.printf_uav_id, NULL, 1024 * 1024 / 4, 4);
if (dxil->metadata.printf.uav_id > 0)
add_uav_resource(resources, 0, dxil->metadata.printf.uav_id, NULL, 1024 * 1024 / 4, 4);
for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,