diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 3ae95e8d5fa..229672033ed 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -109,9 +109,257 @@ nak_optimize_nir(nir_shader *nir, const struct nak_compiler *nak) void nak_preprocess_nir(nir_shader *nir, const struct nak_compiler *nak) { + UNUSED bool progress = false; + + nir_validate_ssa_dominance(nir, "before nak_preprocess_nir"); + + const nir_lower_tex_options tex_options = { + .lower_txp = ~0, + /* TODO: More lowering */ + }; + OPT(nir, nir_lower_tex, &tex_options); + + OPT(nir, nir_lower_global_vars_to_local); + + OPT(nir, nir_split_var_copies); + OPT(nir, nir_split_struct_vars, nir_var_function_temp); + + /* Optimize but allow copies because we haven't lowered them yet */ + optimize_nir(nir, nak, true /* allow_copies */); + + OPT(nir, nir_lower_load_const_to_scalar); + OPT(nir, nir_lower_var_copies); + OPT(nir, nir_lower_system_values); + OPT(nir, nir_lower_compute_system_values, NULL); +} + +static uint16_t +nak_attribute_attr_addr(gl_vert_attrib attrib) +{ + assert(attrib >= VERT_ATTRIB_GENERIC0); + return 0x80 + (attrib - VERT_ATTRIB_GENERIC0) * 0x10; +} + +static int +count_location_bytes(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false) * 16; +} + +static bool +nak_nir_lower_vs_inputs(nir_shader *nir) +{ + bool progress = false; + + nir_foreach_shader_in_variable(var, nir) { + var->data.driver_location = + nak_attribute_attr_addr(var->data.location); + } + + progress |= OPT(nir, nir_lower_io, nir_var_shader_in, count_location_bytes, + nir_lower_io_lower_64bit_to_32); + + return progress; +} + +static uint16_t +nak_varying_attr_addr(gl_varying_slot slot) +{ + if (slot >= VARYING_SLOT_PATCH0) { + return 0x020 + (slot - VARYING_SLOT_PATCH0) * 0x10; + } else if (slot >= VARYING_SLOT_VAR0) { + return 0x080 + (slot - VARYING_SLOT_VAR0) * 0x10; + } else { + switch (slot) { + case VARYING_SLOT_TESS_LEVEL_OUTER: return 0x000; + case VARYING_SLOT_TESS_LEVEL_INNER: return 0x010; + case VARYING_SLOT_PRIMITIVE_ID: return 0x060; + case VARYING_SLOT_LAYER: return 0x064; + case VARYING_SLOT_VIEWPORT: return 0x068; + case VARYING_SLOT_PSIZ: return 0x06c; + case VARYING_SLOT_POS: return 0x070; + case VARYING_SLOT_CLIP_DIST0: return 0x2c0; + case VARYING_SLOT_CLIP_DIST1: return 0x2d0; + default: unreachable("Invalid varying slot"); + } + } +} + +static bool +nak_nir_lower_varyings(nir_shader *nir, nir_variable_mode modes) +{ + bool progress = false; + + assert(!(modes & ~(nir_var_shader_in | nir_var_shader_out))); + + nir_foreach_variable_with_modes(var, nir, modes) + var->data.driver_location = nak_varying_attr_addr(var->data.location); + + progress |= OPT(nir, nir_lower_io, modes, count_location_bytes, 0); + + return progress; +} + +static int +vec_size_4(const struct glsl_type *type, bool bindless) +{ + assert(glsl_type_is_vector_or_scalar(type)); + return 4; +} + +static bool +nak_nir_lower_fs_outputs(nir_shader *nir) +{ + const uint32_t color_targets = + (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) ? + 1 : (nir->info.outputs_written >> FRAG_RESULT_DATA0); + const bool writes_depth = + nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DEPTH); + const bool writes_sample_mask = + nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_SAMPLE_MASK); + + nir->num_outputs = util_bitcount(color_targets) * 4 + + writes_depth + writes_sample_mask; + + nir_foreach_shader_out_variable(var, nir) { + assert(nir->info.outputs_written & BITFIELD_BIT(var->data.location)); + switch (var->data.location) { + case FRAG_RESULT_DEPTH: + var->data.driver_location = util_bitcount(color_targets) * 4; + break; + case FRAG_RESULT_COLOR: + var->data.driver_location = 0; + break; + case FRAG_RESULT_SAMPLE_MASK: + var->data.driver_location = util_bitcount(color_targets) * 4; + var->data.driver_location += writes_depth; + break; + default: { + assert(var->data.location >= FRAG_RESULT_DATA0); + const unsigned out = var->data.location - FRAG_RESULT_DATA0; + var->data.driver_location = + util_bitcount(color_targets & BITFIELD_MASK(out)); + break; + } + } + } + + bool progress = nir->info.outputs_written != 0; + progress |= OPT(nir, nir_lower_io, nir_var_shader_out, vec_size_4, 0); + + return progress; +} + +static uint16_t +nak_sysval_attr_addr(gl_system_value sysval) +{ + switch (sysval) { + case SYSTEM_VALUE_FRAG_COORD: return 0x070; + case SYSTEM_VALUE_POINT_COORD: return 0x2e0; + case SYSTEM_VALUE_TESS_COORD: return 0x2f0; + case SYSTEM_VALUE_INSTANCE_ID: return 0x2f8; + case SYSTEM_VALUE_VERTEX_ID: return 0x2fc; + default: unreachable("Invalid system value"); + } +} + +static bool +nak_nir_lower_system_value_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + b->cursor = nir_before_instr(instr); + + nir_def *val; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_sample_pos: { + const uint32_t addr = nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD); + val = nir_load_input(b, 2, 32, nir_imm_int(b, 0), .base = addr, + .dest_type = nir_type_float32); + val = nir_ffract(b, val); + break; + } + + case nir_intrinsic_load_layer_id: { + const uint32_t addr = nak_varying_attr_addr(VARYING_SLOT_LAYER); + val = nir_load_input(b, intrin->def.num_components, 32, + nir_imm_int(b, 0), .base = addr, + .dest_type = nir_type_int32); + break; + } + + case nir_intrinsic_load_frag_coord: + case nir_intrinsic_load_point_coord: + case nir_intrinsic_load_tess_coord: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_vertex_id: { + const gl_system_value sysval = + nir_system_value_from_intrinsic(intrin->intrinsic); + const uint32_t addr = nak_sysval_attr_addr(sysval); + val = nir_load_input(b, intrin->def.num_components, 32, + nir_imm_int(b, 0), .base = addr, + .dest_type = nir_type_int32); + break; + } + + default: + return false; + } + + nir_def_rewrite_uses(&intrin->def, val); + + return true; +} + +static bool +nak_nir_lower_system_values(nir_shader *nir) +{ + return nir_shader_instructions_pass(nir, nak_nir_lower_system_value_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); } void nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak) { + UNUSED bool progress = false; + + OPT(nir, nir_lower_int64); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: + OPT(nir, nak_nir_lower_vs_inputs); + OPT(nir, nak_nir_lower_varyings, nir_var_shader_out); + break; + + case MESA_SHADER_FRAGMENT: + OPT(nir, nak_nir_lower_varyings, nir_var_shader_in); + OPT(nir, nak_nir_lower_fs_outputs); + break; + + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + break; + + default: + unreachable("Unsupported shader stage"); + } + + OPT(nir, nak_nir_lower_system_values); + + nak_optimize_nir(nir, nak); + nir_divergence_analysis(nir); + + /* Compact SSA defs because we'll use them to index arrays */ + nir_foreach_function(func, nir) { + if (func->impl) + nir_index_ssa_defs(func->impl); + } + + nir_print_shader(nir, stderr); }