nak: Add a bunch of shader lowering code in NIR

The nifty thing about the way NVIDIA hardware does inputs and outputs is
that it maps really well to how we do them in NIR.  We can make the
driver_locations match exactly to the attribute address space.  For
fragment shader outputs, we make them register numbers.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand 2023-01-30 20:53:16 -06:00 committed by Marge Bot
parent 1b70f39ef6
commit c778d39fa4

View file

@ -109,9 +109,257 @@ nak_optimize_nir(nir_shader *nir, const struct nak_compiler *nak)
void
nak_preprocess_nir(nir_shader *nir, const struct nak_compiler *nak)
{
UNUSED bool progress = false;
nir_validate_ssa_dominance(nir, "before nak_preprocess_nir");
const nir_lower_tex_options tex_options = {
.lower_txp = ~0,
/* TODO: More lowering */
};
OPT(nir, nir_lower_tex, &tex_options);
OPT(nir, nir_lower_global_vars_to_local);
OPT(nir, nir_split_var_copies);
OPT(nir, nir_split_struct_vars, nir_var_function_temp);
/* Optimize but allow copies because we haven't lowered them yet */
optimize_nir(nir, nak, true /* allow_copies */);
OPT(nir, nir_lower_load_const_to_scalar);
OPT(nir, nir_lower_var_copies);
OPT(nir, nir_lower_system_values);
OPT(nir, nir_lower_compute_system_values, NULL);
}
static uint16_t
nak_attribute_attr_addr(gl_vert_attrib attrib)
{
assert(attrib >= VERT_ATTRIB_GENERIC0);
return 0x80 + (attrib - VERT_ATTRIB_GENERIC0) * 0x10;
}
static int
count_location_bytes(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false) * 16;
}
static bool
nak_nir_lower_vs_inputs(nir_shader *nir)
{
bool progress = false;
nir_foreach_shader_in_variable(var, nir) {
var->data.driver_location =
nak_attribute_attr_addr(var->data.location);
}
progress |= OPT(nir, nir_lower_io, nir_var_shader_in, count_location_bytes,
nir_lower_io_lower_64bit_to_32);
return progress;
}
static uint16_t
nak_varying_attr_addr(gl_varying_slot slot)
{
if (slot >= VARYING_SLOT_PATCH0) {
return 0x020 + (slot - VARYING_SLOT_PATCH0) * 0x10;
} else if (slot >= VARYING_SLOT_VAR0) {
return 0x080 + (slot - VARYING_SLOT_VAR0) * 0x10;
} else {
switch (slot) {
case VARYING_SLOT_TESS_LEVEL_OUTER: return 0x000;
case VARYING_SLOT_TESS_LEVEL_INNER: return 0x010;
case VARYING_SLOT_PRIMITIVE_ID: return 0x060;
case VARYING_SLOT_LAYER: return 0x064;
case VARYING_SLOT_VIEWPORT: return 0x068;
case VARYING_SLOT_PSIZ: return 0x06c;
case VARYING_SLOT_POS: return 0x070;
case VARYING_SLOT_CLIP_DIST0: return 0x2c0;
case VARYING_SLOT_CLIP_DIST1: return 0x2d0;
default: unreachable("Invalid varying slot");
}
}
}
static bool
nak_nir_lower_varyings(nir_shader *nir, nir_variable_mode modes)
{
bool progress = false;
assert(!(modes & ~(nir_var_shader_in | nir_var_shader_out)));
nir_foreach_variable_with_modes(var, nir, modes)
var->data.driver_location = nak_varying_attr_addr(var->data.location);
progress |= OPT(nir, nir_lower_io, modes, count_location_bytes, 0);
return progress;
}
static int
vec_size_4(const struct glsl_type *type, bool bindless)
{
assert(glsl_type_is_vector_or_scalar(type));
return 4;
}
static bool
nak_nir_lower_fs_outputs(nir_shader *nir)
{
const uint32_t color_targets =
(nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) ?
1 : (nir->info.outputs_written >> FRAG_RESULT_DATA0);
const bool writes_depth =
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DEPTH);
const bool writes_sample_mask =
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_SAMPLE_MASK);
nir->num_outputs = util_bitcount(color_targets) * 4 +
writes_depth + writes_sample_mask;
nir_foreach_shader_out_variable(var, nir) {
assert(nir->info.outputs_written & BITFIELD_BIT(var->data.location));
switch (var->data.location) {
case FRAG_RESULT_DEPTH:
var->data.driver_location = util_bitcount(color_targets) * 4;
break;
case FRAG_RESULT_COLOR:
var->data.driver_location = 0;
break;
case FRAG_RESULT_SAMPLE_MASK:
var->data.driver_location = util_bitcount(color_targets) * 4;
var->data.driver_location += writes_depth;
break;
default: {
assert(var->data.location >= FRAG_RESULT_DATA0);
const unsigned out = var->data.location - FRAG_RESULT_DATA0;
var->data.driver_location =
util_bitcount(color_targets & BITFIELD_MASK(out));
break;
}
}
}
bool progress = nir->info.outputs_written != 0;
progress |= OPT(nir, nir_lower_io, nir_var_shader_out, vec_size_4, 0);
return progress;
}
static uint16_t
nak_sysval_attr_addr(gl_system_value sysval)
{
switch (sysval) {
case SYSTEM_VALUE_FRAG_COORD: return 0x070;
case SYSTEM_VALUE_POINT_COORD: return 0x2e0;
case SYSTEM_VALUE_TESS_COORD: return 0x2f0;
case SYSTEM_VALUE_INSTANCE_ID: return 0x2f8;
case SYSTEM_VALUE_VERTEX_ID: return 0x2fc;
default: unreachable("Invalid system value");
}
}
static bool
nak_nir_lower_system_value_instr(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
b->cursor = nir_before_instr(instr);
nir_def *val;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_sample_pos: {
const uint32_t addr = nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD);
val = nir_load_input(b, 2, 32, nir_imm_int(b, 0), .base = addr,
.dest_type = nir_type_float32);
val = nir_ffract(b, val);
break;
}
case nir_intrinsic_load_layer_id: {
const uint32_t addr = nak_varying_attr_addr(VARYING_SLOT_LAYER);
val = nir_load_input(b, intrin->def.num_components, 32,
nir_imm_int(b, 0), .base = addr,
.dest_type = nir_type_int32);
break;
}
case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_point_coord:
case nir_intrinsic_load_tess_coord:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_vertex_id: {
const gl_system_value sysval =
nir_system_value_from_intrinsic(intrin->intrinsic);
const uint32_t addr = nak_sysval_attr_addr(sysval);
val = nir_load_input(b, intrin->def.num_components, 32,
nir_imm_int(b, 0), .base = addr,
.dest_type = nir_type_int32);
break;
}
default:
return false;
}
nir_def_rewrite_uses(&intrin->def, val);
return true;
}
static bool
nak_nir_lower_system_values(nir_shader *nir)
{
return nir_shader_instructions_pass(nir, nak_nir_lower_system_value_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
}
void
nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak)
{
UNUSED bool progress = false;
OPT(nir, nir_lower_int64);
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
switch (nir->info.stage) {
case MESA_SHADER_VERTEX:
OPT(nir, nak_nir_lower_vs_inputs);
OPT(nir, nak_nir_lower_varyings, nir_var_shader_out);
break;
case MESA_SHADER_FRAGMENT:
OPT(nir, nak_nir_lower_varyings, nir_var_shader_in);
OPT(nir, nak_nir_lower_fs_outputs);
break;
case MESA_SHADER_COMPUTE:
case MESA_SHADER_KERNEL:
break;
default:
unreachable("Unsupported shader stage");
}
OPT(nir, nak_nir_lower_system_values);
nak_optimize_nir(nir, nak);
nir_divergence_analysis(nir);
/* Compact SSA defs because we'll use them to index arrays */
nir_foreach_function(func, nir) {
if (func->impl)
nir_index_ssa_defs(func->impl);
}
nir_print_shader(nir, stderr);
}