//# This file is a part of toml++ and is subject to the the terms of the MIT license. //# Copyright (c) Mark Gillard //# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. // SPDX-License-Identifier: MIT #pragma once #include "preprocessor.hpp" //# {{ #if !TOML_IMPLEMENTATION #error This is an implementation-only header. #endif //# }} #if TOML_ENABLE_FORMATTERS #include "formatter.hpp" #include "print_to_stream.hpp" #include "value.hpp" #include "table.hpp" #include "array.hpp" #include "unicode.hpp" #include "parse_result.hpp" #include "header_start.hpp" TOML_IMPL_NAMESPACE_START { enum class TOML_CLOSED_FLAGS_ENUM formatted_string_traits : unsigned { none, line_breaks = 1u << 0, // \n tabs = 1u << 1, // \t control_chars = 1u << 2, // also includes non-ascii vertical whitespace single_quotes = 1u << 3, non_bare = 1u << 4, // anything not satisfying "is bare key character" non_ascii = 1u << 5, // any codepoint >= 128 all = (non_ascii << 1u) - 1u }; TOML_MAKE_FLAGS(formatted_string_traits); TOML_EXTERNAL_LINKAGE formatter::formatter(const node* source_node, const parse_result* source_pr, const formatter_constants& constants, const formatter_config& config) noexcept // #if TOML_ENABLE_PARSER && !TOML_EXCEPTIONS : source_{ source_pr && *source_pr ? &source_pr->table() : source_node }, result_{ source_pr }, #else : source_{ source_pr ? source_pr : source_node }, #endif constants_{ &constants }, config_{ config } { TOML_ASSERT_ASSUME(source_); config_.flags = (config_.flags | constants_->mandatory_flags) & ~constants_->ignored_flags; indent_columns_ = {}; for (auto c : config_.indent) indent_columns_ += c == '\t' ? 4u : 1u; int_format_mask_ = config_.flags & (format_flags::allow_binary_integers | format_flags::allow_octal_integers | format_flags::allow_hexadecimal_integers); } TOML_EXTERNAL_LINKAGE void formatter::attach(std::ostream & stream) noexcept { indent_ = {}; naked_newline_ = true; stream_ = &stream; } TOML_EXTERNAL_LINKAGE void formatter::detach() noexcept { stream_ = nullptr; } TOML_EXTERNAL_LINKAGE void formatter::print_newline(bool force) { if (!naked_newline_ || force) { print_to_stream(*stream_, '\n'); naked_newline_ = true; } } TOML_EXTERNAL_LINKAGE void formatter::print_indent() { for (int i = 0; i < indent_; i++) { print_to_stream(*stream_, config_.indent); naked_newline_ = false; } } TOML_EXTERNAL_LINKAGE void formatter::print_unformatted(char c) { print_to_stream(*stream_, c); naked_newline_ = false; } TOML_EXTERNAL_LINKAGE void formatter::print_unformatted(std::string_view str) { print_to_stream(*stream_, str); naked_newline_ = false; } TOML_EXTERNAL_LINKAGE void formatter::print_string(std::string_view str, bool allow_multi_line, bool allow_bare, bool allow_literal_whitespace) { if (str.empty()) { print_unformatted(literal_strings_allowed() ? "''"sv : "\"\""sv); return; } // pre-scan the string to determine how we should output it formatted_string_traits traits = {}; if (!allow_bare) traits |= formatted_string_traits::non_bare; bool unicode_allowed = unicode_strings_allowed(); // ascii fast path if (is_ascii(str.data(), str.length())) { for (auto c : str) { switch (c) { case '\n': traits |= formatted_string_traits::line_breaks; break; case '\t': traits |= formatted_string_traits::tabs; break; case '\'': traits |= formatted_string_traits::single_quotes; break; default: { if TOML_UNLIKELY(is_control_character(c)) traits |= formatted_string_traits::control_chars; if (!is_ascii_bare_key_character(static_cast(c))) traits |= formatted_string_traits::non_bare; break; } } static constexpr auto all_ascii_traits = formatted_string_traits::all & ~formatted_string_traits::non_ascii; if (traits == all_ascii_traits) break; } } // unicode slow path else { traits |= formatted_string_traits::non_ascii; utf8_decoder decoder; // if the unicode is malformed just treat the string as a single-line non-literal and // escape all non-ascii characters (to ensure round-tripping and help with diagnostics) const auto bad_unicode = [&]() noexcept { traits &= ~formatted_string_traits::line_breaks; traits |= formatted_string_traits::control_chars | formatted_string_traits::non_bare; unicode_allowed = false; }; for (auto c : str) { decoder(c); if TOML_UNLIKELY(decoder.error()) { bad_unicode(); break; } if (!decoder.has_code_point()) continue; switch (decoder.codepoint) { case U'\n': traits |= formatted_string_traits::line_breaks; break; case U'\t': traits |= formatted_string_traits::tabs; break; case U'\'': traits |= formatted_string_traits::single_quotes; break; default: { if TOML_UNLIKELY(is_control_character(decoder.codepoint) || is_non_ascii_vertical_whitespace(decoder.codepoint)) traits |= formatted_string_traits::control_chars; if (!is_bare_key_character(decoder.codepoint)) traits |= formatted_string_traits::non_bare; break; } } } if (decoder.needs_more_input()) bad_unicode(); } // strings with line breaks, tabs, and single-quotes can't be bare if (!!(traits & (formatted_string_traits::line_breaks | formatted_string_traits::tabs | formatted_string_traits::single_quotes))) traits |= formatted_string_traits::non_bare; // if the string meets the requirements of being 'bare' we can emit a bare string // (bare strings are composed of letters and numbers; no whitespace, control chars, quotes, etc) if (!(traits & formatted_string_traits::non_bare) && (!(traits & formatted_string_traits::non_ascii) || unicode_allowed)) { print_unformatted(str); return; } const auto real_tabs_allowed = allow_literal_whitespace && real_tabs_in_strings_allowed(); // determine if this should be a multi-line string (triple-quotes) const auto multi_line = allow_literal_whitespace // && allow_multi_line // && multi_line_strings_allowed() // && !!(traits & formatted_string_traits::line_breaks); // determine if this should be a literal string (single-quotes with no escaping) const auto literal = literal_strings_allowed() // && !(traits & formatted_string_traits::control_chars) // && (!(traits & formatted_string_traits::single_quotes) || multi_line) // && (!(traits & formatted_string_traits::tabs) || real_tabs_allowed) // && (!(traits & formatted_string_traits::line_breaks) || multi_line) // && (!(traits & formatted_string_traits::non_ascii) || unicode_allowed); // literal strings (single quotes, no escape codes) if (literal) { const auto quot = multi_line ? R"(''')"sv : R"(')"sv; print_unformatted(quot); print_unformatted(str); print_unformatted(quot); return; } // anything from here down is a non-literal string, so requires iteration and escaping. print_unformatted(multi_line ? R"(""")"sv : R"(")"sv); // ascii fast path if (!(traits & formatted_string_traits::non_ascii)) { for (auto c : str) { switch (c) { case '"': print_to_stream(*stream_, R"(\")"sv); break; case '\\': print_to_stream(*stream_, R"(\\)"sv); break; case '\x7F': print_to_stream(*stream_, R"(\u007F)"sv); break; case '\t': print_to_stream(*stream_, real_tabs_allowed ? "\t"sv : R"(\t)"sv); break; case '\n': print_to_stream(*stream_, multi_line ? "\n"sv : R"(\n)"sv); break; default: { // control characters from lookup table if TOML_UNLIKELY(c >= '\x00' && c <= '\x1F') print_to_stream(*stream_, control_char_escapes[c]); // regular characters else print_to_stream(*stream_, c); } } } } // unicode slow path else { utf8_decoder decoder; const char* cp_start = str.data(); const char* cp_end = cp_start; for (auto c : str) { decoder(c); cp_end++; // if the decoder encounters malformed unicode just emit raw bytes and if (decoder.error()) { while (cp_start != cp_end) { print_to_stream(*stream_, R"(\u00)"sv); print_to_stream(*stream_, static_cast(*cp_start), value_flags::format_as_hexadecimal, 2); cp_start++; } decoder.reset(); continue; } if (!decoder.has_code_point()) continue; switch (decoder.codepoint) { case U'"': print_to_stream(*stream_, R"(\")"sv); break; case U'\\': print_to_stream(*stream_, R"(\\)"sv); break; case U'\x7F': print_to_stream(*stream_, R"(\u007F)"sv); break; case U'\t': print_to_stream(*stream_, real_tabs_allowed ? "\t"sv : R"(\t)"sv); break; case U'\n': print_to_stream(*stream_, multi_line ? "\n"sv : R"(\n)"sv); break; default: { // control characters from lookup table if TOML_UNLIKELY(decoder.codepoint <= U'\x1F') print_to_stream(*stream_, control_char_escapes[static_cast(decoder.codepoint)]); // escaped unicode characters else if (decoder.codepoint > U'\x7F' && (!unicode_allowed || is_non_ascii_vertical_whitespace(decoder.codepoint))) { if (static_cast(decoder.codepoint) > 0xFFFFu) { print_to_stream(*stream_, R"(\U)"sv); print_to_stream(*stream_, static_cast(decoder.codepoint), value_flags::format_as_hexadecimal, 8); } else { print_to_stream(*stream_, R"(\u)"sv); print_to_stream(*stream_, static_cast(decoder.codepoint), value_flags::format_as_hexadecimal, 4); } } // regular characters else print_to_stream(*stream_, cp_start, static_cast(cp_end - cp_start)); } } cp_start = cp_end; } } print_unformatted(multi_line ? R"(""")"sv : R"(")"sv); } TOML_EXTERNAL_LINKAGE void formatter::print(const value& val) { print_string(val.get()); } TOML_EXTERNAL_LINKAGE void formatter::print(const value& val) { naked_newline_ = false; if (*val >= 0 && !!int_format_mask_) { static constexpr auto value_flags_mask = value_flags::format_as_binary | value_flags::format_as_octal | value_flags::format_as_hexadecimal; const auto fmt = val.flags() & value_flags_mask; switch (fmt) { case value_flags::format_as_binary: if (!!(int_format_mask_ & format_flags::allow_binary_integers)) { print_to_stream(*stream_, "0b"sv); print_to_stream(*stream_, *val, fmt); return; } break; case value_flags::format_as_octal: if (!!(int_format_mask_ & format_flags::allow_octal_integers)) { print_to_stream(*stream_, "0o"sv); print_to_stream(*stream_, *val, fmt); return; } break; case value_flags::format_as_hexadecimal: if (!!(int_format_mask_ & format_flags::allow_hexadecimal_integers)) { print_to_stream(*stream_, "0x"sv); print_to_stream(*stream_, *val, fmt); return; } break; default: break; } } // fallback to decimal print_to_stream(*stream_, *val); } TOML_EXTERNAL_LINKAGE void formatter::print(const value& val) { const std::string_view* inf_nan = nullptr; switch (fpclassify(*val)) { case fp_class::neg_inf: inf_nan = &constants_->float_neg_inf; break; case fp_class::pos_inf: inf_nan = &constants_->float_pos_inf; break; case fp_class::nan: inf_nan = &constants_->float_nan; break; case fp_class::ok: print_to_stream(*stream_, *val, value_flags::none, !!(config_.flags & format_flags::relaxed_float_precision)); break; default: TOML_UNREACHABLE; } if (inf_nan) { if (!!(config_.flags & format_flags::quote_infinities_and_nans)) print_to_stream_bookended(*stream_, *inf_nan, '"'); else print_to_stream(*stream_, *inf_nan); } naked_newline_ = false; } TOML_EXTERNAL_LINKAGE void formatter::print(const value& val) { print_unformatted(*val ? constants_->bool_true : constants_->bool_false); } TOML_EXTERNAL_LINKAGE void formatter::print(const value& val) { if (!!(config_.flags & format_flags::quote_dates_and_times)) print_to_stream_bookended(*stream_, *val, literal_strings_allowed() ? '\'' : '"'); else print_to_stream(*stream_, *val); naked_newline_ = false; } TOML_EXTERNAL_LINKAGE void formatter::print(const value