diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 36103e439ae..dfe08f97df6 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11192,7 +11192,7 @@ export_mrt(isel_context* ctx, const struct aco_export_mrt* mrt) } static bool -export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, bool is_ps_epilog, +export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, struct aco_export_mrt* mrt) { Builder bld(ctx->program, ctx->block); @@ -11206,11 +11206,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, bool unsigned enabled_channels = 0; aco_opcode compr_op = aco_opcode::num_opcodes; bool compr = false; + bool is_16bit = values[0].regClass() == v2b; target = V_008DFC_SQ_EXP_MRT + out->slot; /* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */ - if (out->enable_mrt_output_nan_fixup && + if (out->enable_mrt_output_nan_fixup && !is_16bit && (out->col_format == V_028714_SPI_SHADER_32_R || out->col_format == V_028714_SPI_SHADER_32_GR || out->col_format == V_028714_SPI_SHADER_32_AR || out->col_format == V_028714_SPI_SHADER_32_ABGR || out->col_format == V_028714_SPI_SHADER_FP16_ABGR)) { @@ -11239,90 +11240,90 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, bool break; case V_028714_SPI_SHADER_FP16_ABGR: - if (is_ps_epilog) { - for (int i = 0; i < 2; i++) { - bool enabled = (out->write_mask >> (i * 2)) & 0x3; - if (enabled) { - enabled_channels |= 0x3 << (i * 2); - if (ctx->options->gfx_level == GFX8 || ctx->options->gfx_level == GFX9) { - values[i] = - bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, bld.def(v1), - values[i * 2].isUndefined() ? Operand::zero() : values[i * 2], - values[i * 2 + 1].isUndefined() ? Operand::zero() : values[i * 2 + 1]); - } else { - values[i] = - bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1), - values[i * 2].isUndefined() ? values[i * 2 + 1] : values[i * 2], - values[i * 2 + 1].isUndefined() ? values[i * 2] : values[i * 2 + 1]); - } + for (int i = 0; i < 2; i++) { + bool enabled = (out->write_mask >> (i * 2)) & 0x3; + if (enabled) { + enabled_channels |= 0x3 << (i * 2); + if (is_16bit) { + values[i] = + bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), + values[i * 2].isUndefined() ? Operand(v2b) : values[i * 2], + values[i * 2 + 1].isUndefined() ? Operand(v2b) : values[i * 2 + 1]); + } else if (ctx->options->gfx_level == GFX8 || ctx->options->gfx_level == GFX9) { + values[i] = + bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, bld.def(v1), + values[i * 2].isUndefined() ? Operand::zero() : values[i * 2], + values[i * 2 + 1].isUndefined() ? Operand::zero() : values[i * 2 + 1]); } else { - values[i] = Operand(v1); + values[i] = + bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1), + values[i * 2].isUndefined() ? values[i * 2 + 1] : values[i * 2], + values[i * 2 + 1].isUndefined() ? values[i * 2] : values[i * 2 + 1]); } + } else { + values[i] = Operand(v1); } - values[2] = Operand(v1); - values[3] = Operand(v1); - } else { - enabled_channels = util_widen_mask(out->write_mask, 2); } + values[2] = Operand(v1); + values[3] = Operand(v1); compr = true; break; case V_028714_SPI_SHADER_UNORM16_ABGR: - if (is_ps_epilog) { - compr_op = aco_opcode::v_cvt_pknorm_u16_f32; + if (is_16bit && ctx->options->gfx_level >= GFX9) { + compr_op = aco_opcode::v_cvt_pknorm_u16_f16; } else { - enabled_channels = util_widen_mask(out->write_mask, 2); - compr = true; + compr_op = aco_opcode::v_cvt_pknorm_u16_f32; } break; + case V_028714_SPI_SHADER_SNORM16_ABGR: - if (is_ps_epilog) { - compr_op = aco_opcode::v_cvt_pknorm_i16_f32; + if (is_16bit && ctx->options->gfx_level >= GFX9) { + compr_op = aco_opcode::v_cvt_pknorm_i16_f16; } else { - enabled_channels = util_widen_mask(out->write_mask, 2); - compr = true; + compr_op = aco_opcode::v_cvt_pknorm_i16_f32; } break; case V_028714_SPI_SHADER_UINT16_ABGR: - if (is_ps_epilog) { - compr_op = aco_opcode::v_cvt_pk_u16_u32; - if (out->is_int8 || out->is_int10) { - /* clamp */ - uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0; + compr_op = aco_opcode::v_cvt_pk_u16_u32; + if (out->is_int8 || out->is_int10) { + /* clamp */ + uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0; - u_foreach_bit(i, out->write_mask) { - uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb; + u_foreach_bit(i, out->write_mask) { + uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb; - values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]); - } + values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]); + } + } else if (is_16bit) { + u_foreach_bit(i, out->write_mask) { + Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, false); + values[i] = Operand(tmp); } - } else { - enabled_channels = util_widen_mask(out->write_mask, 2); - compr = true; } break; case V_028714_SPI_SHADER_SINT16_ABGR: - if (is_ps_epilog) { - compr_op = aco_opcode::v_cvt_pk_i16_i32; - if (out->is_int8 || out->is_int10) { - /* clamp */ - uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0; - uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0; + compr_op = aco_opcode::v_cvt_pk_i16_i32; + if (out->is_int8 || out->is_int10) { + /* clamp */ + uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0; + uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0; - u_foreach_bit(i, out->write_mask) { - uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb; - uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb; + u_foreach_bit(i, out->write_mask) { + uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb; + uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb; - values[i] = bld.vop2(aco_opcode::v_min_i32, bld.def(v1), Operand::c32(max), values[i]); - values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]); - } + values[i] = bld.vop2(aco_opcode::v_min_i32, bld.def(v1), Operand::c32(max), values[i]); + values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]); + } + } else if (is_16bit) { + u_foreach_bit(i, out->write_mask) { + Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, true); + values[i] = Operand(tmp); } - } else { - enabled_channels = util_widen_mask(out->write_mask, 2); - compr = true; } break; @@ -11512,7 +11513,7 @@ create_fs_exports(isel_context* ctx) } } - if (export_fs_mrt_color(ctx, &out, false, &mrts[compacted_mrt_index])) { + if (export_fs_mrt_color(ctx, &out, &mrts[compacted_mrt_index])) { compacted_mrt_index++; exported = true; } @@ -12449,7 +12450,7 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1)); } - if (export_fs_mrt_color(&ctx, &out, true, &mrts[i])) { + if (export_fs_mrt_color(&ctx, &out, &mrts[i])) { exported_mrts |= 1 << i; } } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 5f2df7201ef..1aa69dad0f6 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3385,173 +3385,6 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_physical_device *pdevice, return progress; } -static bool -radv_lower_fs_output(nir_shader *nir, const struct radv_pipeline_key *pipeline_key) -{ - if (pipeline_key->ps.has_epilog) - return false; - - nir_function_impl *impl = nir_shader_get_entrypoint(nir); - bool progress = false; - - nir_builder b; - nir_builder_init(&b, impl); - - nir_foreach_block(block, impl) { - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_store_output) - continue; - - int slot = nir_intrinsic_base(intrin) - FRAG_RESULT_DATA0; - if (slot < 0) - continue; - - unsigned write_mask = nir_intrinsic_write_mask(intrin); - unsigned col_format = (pipeline_key->ps.col_format >> (4 * slot)) & 0xf; - bool is_int8 = (pipeline_key->ps.is_int8 >> slot) & 1; - bool is_int10 = (pipeline_key->ps.is_int10 >> slot) & 1; - bool enable_mrt_output_nan_fixup = (pipeline_key->ps.enable_mrt_output_nan_fixup >> slot) & 1; - bool is_16bit = intrin->src[0].ssa->bit_size == 16; - - if (col_format == V_028714_SPI_SHADER_ZERO) - continue; - - b.cursor = nir_before_instr(instr); - nir_ssa_def *values[4]; - - /* Extract the export values. */ - for (unsigned i = 0; i < 4; i++) { - if (write_mask & (1 << i)) { - values[i] = nir_channel(&b, intrin->src[0].ssa, i); - } else { - values[i] = nir_ssa_undef(&b, 1, 32); - } - } - - /* Replace NaN by zero (for 32-bit float formats) to fix game bugs if requested. */ - if (enable_mrt_output_nan_fixup && !nir->info.internal && !is_16bit) { - u_foreach_bit(i, write_mask) { - const bool save_exact = b.exact; - - b.exact = true; - nir_ssa_def *isnan = nir_fneu(&b, values[i], values[i]); - b.exact = save_exact; - - values[i] = nir_bcsel(&b, isnan, nir_imm_zero(&b, 1, 32), values[i]); - } - } - - if (col_format == V_028714_SPI_SHADER_FP16_ABGR || - col_format == V_028714_SPI_SHADER_UNORM16_ABGR || - col_format == V_028714_SPI_SHADER_SNORM16_ABGR || - col_format == V_028714_SPI_SHADER_UINT16_ABGR || - col_format == V_028714_SPI_SHADER_SINT16_ABGR) { - /* Convert and/or clamp the export values. */ - switch (col_format) { - case V_028714_SPI_SHADER_UINT16_ABGR: { - unsigned max_rgb = is_int8 ? 255 : is_int10 ? 1023 : 0; - u_foreach_bit(i, write_mask) { - if (is_int8 || is_int10) { - values[i] = nir_umin(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, 3u) - : nir_imm_int(&b, max_rgb)); - } else if (is_16bit) { - values[i] = nir_u2u32(&b, values[i]); - } - } - break; - } - case V_028714_SPI_SHADER_SINT16_ABGR: { - unsigned max_rgb = is_int8 ? 127 : is_int10 ? 511 : 0; - unsigned min_rgb = is_int8 ? -128 : is_int10 ? -512 : 0; - u_foreach_bit(i, write_mask) { - if (is_int8 || is_int10) { - values[i] = nir_imin(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, 1u) - : nir_imm_int(&b, max_rgb)); - values[i] = nir_imax(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, -2u) - : nir_imm_int(&b, min_rgb)); - } else if (is_16bit) { - values[i] = nir_i2i32(&b, values[i]); - } - } - break; - } - case V_028714_SPI_SHADER_UNORM16_ABGR: - case V_028714_SPI_SHADER_SNORM16_ABGR: - u_foreach_bit(i, write_mask) { - if (is_16bit) { - values[i] = nir_f2f32(&b, values[i]); - } - } - break; - default: - break; - } - - /* Only nir_pack_32_2x16_split needs 16-bit inputs. */ - bool input_16_bit = col_format == V_028714_SPI_SHADER_FP16_ABGR && is_16bit; - unsigned new_write_mask = 0; - - /* Pack the export values. */ - for (unsigned i = 0; i < 2; i++) { - bool enabled = (write_mask >> (i * 2)) & 0x3; - - if (!enabled) { - values[i] = nir_ssa_undef(&b, 1, 32); - continue; - } - - nir_ssa_def *src0 = values[i * 2]; - nir_ssa_def *src1 = values[i * 2 + 1]; - - if (!(write_mask & (1 << (i * 2)))) - src0 = nir_imm_zero(&b, 1, input_16_bit ? 16 : 32); - if (!(write_mask & (1 << (i * 2 + 1)))) - src1 = nir_imm_zero(&b, 1, input_16_bit ? 16 : 32); - - if (col_format == V_028714_SPI_SHADER_FP16_ABGR) { - if (is_16bit) { - values[i] = nir_pack_32_2x16_split(&b, src0, src1); - } else { - values[i] = nir_pack_half_2x16_split(&b, src0, src1); - } - } else if (col_format == V_028714_SPI_SHADER_UNORM16_ABGR) { - values[i] = nir_pack_unorm_2x16(&b, nir_vec2(&b, src0, src1)); - } else if (col_format == V_028714_SPI_SHADER_SNORM16_ABGR) { - values[i] = nir_pack_snorm_2x16(&b, nir_vec2(&b, src0, src1)); - } else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR) { - values[i] = nir_pack_uint_2x16(&b, nir_vec2(&b, src0, src1)); - } else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR) { - values[i] = nir_pack_sint_2x16(&b, nir_vec2(&b, src0, src1)); - } - - new_write_mask |= 1 << i; - } - - /* Update the write mask for compressed outputs. */ - nir_intrinsic_set_write_mask(intrin, new_write_mask); - intrin->num_components = util_last_bit(new_write_mask); - } - - nir_ssa_def *new_src = nir_vec(&b, values, intrin->num_components); - - nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], nir_src_for_ssa(new_src)); - - progress = true; - } - } - - if (progress) - nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); - else - nir_metadata_preserve(impl, nir_metadata_all); - - return progress; -} - void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, struct radv_pipeline_stage *out_stage, gl_shader_stage stage) @@ -4130,11 +3963,6 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout pipeline_key); } - if (stages[MESA_SHADER_FRAGMENT].nir && !radv_use_llvm_for_stage(device, MESA_SHADER_FRAGMENT)) { - /* TODO: Convert the LLVM backend. */ - NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_lower_fs_output, pipeline_key); - } - radv_fill_shader_info(pipeline, pipeline_layout, pipeline_key, stages); radv_declare_pipeline_args(device, stages, pipeline_key);