123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446 |
- /*
- * mptString.h
- * ----------
- * Purpose: Small string-related utilities, number and message formatting.
- * Notes : Currently none.
- * Authors: OpenMPT Devs
- * The OpenMPT source code is released under the BSD license. Read LICENSE for more details.
- */
- #pragma once
- #include "openmpt/all/BuildSettings.hpp"
- #include "mpt/base/alloc.hpp"
- #include "mpt/base/span.hpp"
- #include "mpt/string/types.hpp"
- #include "mpt/string/utility.hpp"
- #include "mptBaseTypes.h"
- #include <algorithm>
- #include <limits>
- #include <string>
- #include <string_view>
- #include <cstring>
- OPENMPT_NAMESPACE_BEGIN
- namespace mpt
- {
- namespace String
- {
- template <typename Tstring, typename Tstring2, typename Tstring3>
- inline Tstring Replace(Tstring str, const Tstring2 &oldStr, const Tstring3 &newStr)
- {
- return mpt::replace(str, oldStr, newStr);
- }
- } // namespace String
- enum class Charset {
- UTF8,
- ASCII, // strictly 7-bit ASCII
- ISO8859_1,
- ISO8859_15,
- CP850,
- CP437,
- CP437AMS,
- CP437AMS2,
- Windows1252,
- Amiga,
- RISC_OS,
- ISO8859_1_no_C1,
- ISO8859_15_no_C1,
- Amiga_no_C1,
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- Locale, // CP_ACP on windows, current C locale otherwise
- #endif // MPT_ENABLE_CHARSET_LOCALE
- };
- // source code / preprocessor (i.e. # token)
- inline constexpr Charset CharsetSource = Charset::ASCII;
- // debug log files
- inline constexpr Charset CharsetLogfile = Charset::UTF8;
- // std::clog / std::cout / std::cerr
- #if defined(MODPLUG_TRACKER) && MPT_OS_WINDOWS && defined(MPT_ENABLE_CHARSET_LOCALE)
- inline constexpr Charset CharsetStdIO = Charset::Locale;
- #else
- inline constexpr Charset CharsetStdIO = Charset::UTF8;
- #endif
- // getenv
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- inline constexpr Charset CharsetEnvironment = Charset::Locale;
- #else
- inline constexpr Charset CharsetEnvironment = Charset::UTF8;
- #endif
- // std::exception::what()
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- inline constexpr Charset CharsetException = Charset::Locale;
- #else
- inline constexpr Charset CharsetException = Charset::UTF8;
- #endif
- // Checks if the std::string represents an UTF8 string.
- // This is currently implemented as converting to std::wstring and back assuming UTF8 both ways,
- // and comparing the result to the original string.
- // Caveats:
- // - can give false negatives because of possible unicode normalization during conversion
- // - can give false positives if the 8bit encoding contains high-ascii only in valid utf8 groups
- // - slow because of double conversion
- bool IsUTF8(const std::string &str);
- #if MPT_WSTRING_CONVERT
- // Convert to a wide character string.
- // The wide encoding is UTF-16 or UTF-32, based on sizeof(wchar_t).
- // If str does not contain any invalid characters, this conversion is lossless.
- // Invalid source bytes will be replaced by some replacement character or string.
- inline std::wstring ToWide(const std::wstring &str) { return str; }
- inline std::wstring ToWide(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
- std::wstring ToWide(Charset from, const std::string &str);
- inline std::wstring ToWide(Charset from, const char * str) { return ToWide(from, str ? std::string(str) : std::string()); }
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- std::wstring ToWide(const mpt::lstring &str);
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #endif
- // Convert to a string encoded in the 'to'-specified character set.
- // If str does not contain any invalid characters,
- // this conversion will be lossless iff, and only iff,
- // 'to' is UTF8.
- // Invalid source bytes or characters that are not representable in the
- // destination charset will be replaced by some replacement character or string.
- #if MPT_WSTRING_CONVERT
- std::string ToCharset(Charset to, const std::wstring &str);
- inline std::string ToCharset(Charset to, const wchar_t * str) { return ToCharset(to, str ? std::wstring(str) : std::wstring()); }
- #endif
- std::string ToCharset(Charset to, Charset from, const std::string &str);
- inline std::string ToCharset(Charset to, Charset from, const char * str) { return ToCharset(to, from, str ? std::string(str) : std::string()); }
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- std::string ToCharset(Charset to, const mpt::lstring &str);
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- #if MPT_WSTRING_CONVERT
- mpt::lstring ToLocale(const std::wstring &str);
- inline mpt::lstring ToLocale(const wchar_t * str) { return ToLocale(str ? std::wstring(str): std::wstring()); }
- #endif
- mpt::lstring ToLocale(Charset from, const std::string &str);
- inline mpt::lstring ToLocale(Charset from, const char * str) { return ToLocale(from, str ? std::string(str): std::string()); }
- inline mpt::lstring ToLocale(const mpt::lstring &str) { return str; }
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #if MPT_OS_WINDOWS
- #if MPT_WSTRING_CONVERT
- mpt::winstring ToWin(const std::wstring &str);
- inline mpt::winstring ToWin(const wchar_t * str) { return ToWin(str ? std::wstring(str): std::wstring()); }
- #endif
- mpt::winstring ToWin(Charset from, const std::string &str);
- inline mpt::winstring ToWin(Charset from, const char * str) { return ToWin(from, str ? std::string(str): std::string()); }
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- mpt::winstring ToWin(const mpt::lstring &str);
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #endif // MPT_OS_WINDOWS
- #if defined(MPT_WITH_MFC)
- #if !(MPT_WSTRING_CONVERT)
- #error "MFC depends on MPT_WSTRING_CONVERT)"
- #endif
- // Convert to a MFC CString. The CString encoding depends on UNICODE.
- // This should also be used when converting to TCHAR strings.
- // If UNICODE is defined, this is a completely lossless operation.
- inline CString ToCString(const CString &str) { return str; }
- CString ToCString(const std::wstring &str);
- inline CString ToCString(const wchar_t * str) { return ToCString(str ? std::wstring(str) : std::wstring()); }
- CString ToCString(Charset from, const std::string &str);
- inline CString ToCString(Charset from, const char * str) { return ToCString(from, str ? std::string(str) : std::string()); }
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- CString ToCString(const mpt::lstring &str);
- mpt::lstring ToLocale(const CString &str);
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #if MPT_OS_WINDOWS
- mpt::winstring ToWin(const CString &str);
- #endif // MPT_OS_WINDOWS
- // Convert from a MFC CString. The CString encoding depends on UNICODE.
- // This should also be used when converting from TCHAR strings.
- // If UNICODE is defined, this is a completely lossless operation.
- std::wstring ToWide(const CString &str);
- std::string ToCharset(Charset to, const CString &str);
- #endif // MPT_WITH_MFC
- #define UC_(x) MPT_UCHAR(x)
- #define UL_(x) MPT_ULITERAL(x)
- #define U_(x) MPT_USTRING(x)
- #if MPT_USTRING_MODE_WIDE
- #if !(MPT_WSTRING_CONVERT)
- #error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
- #endif
- inline mpt::ustring ToUnicode(const std::wstring &str) { return str; }
- inline mpt::ustring ToUnicode(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
- inline mpt::ustring ToUnicode(Charset from, const std::string &str) { return ToWide(from, str); }
- inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- inline mpt::ustring ToUnicode(const mpt::lstring &str) { return ToWide(str); }
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #if defined(MPT_WITH_MFC)
- inline mpt::ustring ToUnicode(const CString &str) { return ToWide(str); }
- #endif // MFC
- #else // !MPT_USTRING_MODE_WIDE
- inline mpt::ustring ToUnicode(const mpt::ustring &str) { return str; }
- #if MPT_WSTRING_CONVERT
- mpt::ustring ToUnicode(const std::wstring &str);
- inline mpt::ustring ToUnicode(const wchar_t * str) { return ToUnicode(str ? std::wstring(str) : std::wstring()); }
- #endif
- mpt::ustring ToUnicode(Charset from, const std::string &str);
- inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- mpt::ustring ToUnicode(const mpt::lstring &str);
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #if defined(MPT_WITH_MFC)
- mpt::ustring ToUnicode(const CString &str);
- #endif // MPT_WITH_MFC
- #endif // MPT_USTRING_MODE_WIDE
- #if MPT_USTRING_MODE_WIDE
- #if !(MPT_WSTRING_CONVERT)
- #error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
- #endif
- // nothing, std::wstring overloads will catch all stuff
- #else // !MPT_USTRING_MODE_WIDE
- #if MPT_WSTRING_CONVERT
- std::wstring ToWide(const mpt::ustring &str);
- #endif
- std::string ToCharset(Charset to, const mpt::ustring &str);
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- mpt::lstring ToLocale(const mpt::ustring &str);
- #endif // MPT_ENABLE_CHARSET_LOCALE
- #if MPT_OS_WINDOWS
- mpt::winstring ToWin(const mpt::ustring &str);
- #endif // MPT_OS_WINDOWS
- #if defined(MPT_WITH_MFC)
- CString ToCString(const mpt::ustring &str);
- #endif // MPT_WITH_MFC
- #endif // MPT_USTRING_MODE_WIDE
- // The MPT_UTF8 allows specifying UTF8 char arrays.
- // The resulting type is mpt::ustring and the construction might require runtime translation,
- // i.e. it is NOT generally available at compile time.
- // Use explicit UTF8 encoding,
- // i.e. U+00FC (LATIN SMALL LETTER U WITH DIAERESIS) would be written as "\xC3\xBC".
- #define MPT_UTF8(x) mpt::ToUnicode(mpt::Charset::UTF8, x)
- mpt::ustring ToUnicode(uint16 codepage, mpt::Charset fallback, const std::string &str);
- char ToLowerCaseAscii(char c);
- char ToUpperCaseAscii(char c);
- std::string ToLowerCaseAscii(std::string s);
- std::string ToUpperCaseAscii(std::string s);
- int CompareNoCaseAscii(const char *a, const char *b, std::size_t n);
- int CompareNoCaseAscii(std::string_view a, std::string_view b);
- int CompareNoCaseAscii(const std::string &a, const std::string &b);
- #if defined(MODPLUG_TRACKER)
- mpt::ustring ToLowerCase(const mpt::ustring &s);
- mpt::ustring ToUpperCase(const mpt::ustring &s);
- #endif // MODPLUG_TRACKER
- } // namespace mpt
- // The AnyString types are meant to be used as function argument types only,
- // and only during the transition phase to all-unicode strings in the whole codebase.
- // Using an AnyString type as function argument avoids the need to overload a function for all the
- // different string types that we currently have.
- // Warning: These types will silently do charset conversions. Only use them when this can be tolerated.
- // BasicAnyString is convertable to mpt::ustring and constructable from any string at all.
- template <mpt::Charset charset = mpt::Charset::UTF8, bool tryUTF8 = true>
- class BasicAnyString : public mpt::ustring
- {
- private:
-
- static mpt::ustring From8bit(const std::string &str)
- {
- if constexpr(charset == mpt::Charset::UTF8)
- {
- return mpt::ToUnicode(mpt::Charset::UTF8, str);
- } else
- {
- // auto utf8 detection
- if constexpr(tryUTF8)
- {
- if(mpt::IsUTF8(str))
- {
- return mpt::ToUnicode(mpt::Charset::UTF8, str);
- } else
- {
- return mpt::ToUnicode(charset, str);
- }
- } else
- {
- return mpt::ToUnicode(charset, str);
- }
- }
- }
- public:
- // 8 bit
- BasicAnyString(const char *str) : mpt::ustring(From8bit(str ? str : std::string())) { }
- BasicAnyString(const std::string str) : mpt::ustring(From8bit(str)) { }
- // locale
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- BasicAnyString(const mpt::lstring str) : mpt::ustring(mpt::ToUnicode(str)) { }
- #endif // MPT_ENABLE_CHARSET_LOCALE
- // unicode
- BasicAnyString(const mpt::ustring &str) : mpt::ustring(str) { }
- BasicAnyString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
- #if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
- BasicAnyString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
- #endif
- #if MPT_WSTRING_CONVERT
- BasicAnyString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
- #endif
- // mfc
- #if defined(MPT_WITH_MFC)
- BasicAnyString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
- #endif // MPT_WITH_MFC
- // fallback for custom string types
- template <typename Tstring> BasicAnyString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
- template <typename Tstring> BasicAnyString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }
- };
- // AnyUnicodeString is convertable to mpt::ustring and constructable from any unicode string,
- class AnyUnicodeString : public mpt::ustring
- {
- public:
- // locale
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- AnyUnicodeString(const mpt::lstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
- #endif // MPT_ENABLE_CHARSET_LOCALE
- // unicode
- AnyUnicodeString(const mpt::ustring &str) : mpt::ustring(str) { }
- AnyUnicodeString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
- #if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
- AnyUnicodeString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
- #endif
- #if MPT_WSTRING_CONVERT
- AnyUnicodeString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
- #endif
- // mfc
- #if defined(MPT_WITH_MFC)
- AnyUnicodeString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
- #endif // MPT_WITH_MFC
- // fallback for custom string types
- template <typename Tstring> AnyUnicodeString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
- template <typename Tstring> AnyUnicodeString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }
- };
- // AnyString
- // Try to do the smartest auto-magic we can do.
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- using AnyString = BasicAnyString<mpt::Charset::Locale, true>;
- #elif MPT_OS_WINDOWS
- using AnyString = BasicAnyString<mpt::Charset::Windows1252, true>;
- #else
- using AnyString = BasicAnyString<mpt::Charset::ISO8859_1, true>;
- #endif
- // AnyStringLocale
- // char-based strings are assumed to be in locale encoding.
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- using AnyStringLocale = BasicAnyString<mpt::Charset::Locale, false>;
- #else
- using AnyStringLocale = BasicAnyString<mpt::Charset::UTF8, false>;
- #endif
- // AnyStringUTF8orLocale
- // char-based strings are tried in UTF8 first, if this fails, locale is used.
- #if defined(MPT_ENABLE_CHARSET_LOCALE)
- using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::Locale, true>;
- #else
- using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::UTF8, false>;
- #endif
- // AnyStringUTF8
- // char-based strings are assumed to be in UTF8.
- using AnyStringUTF8 = BasicAnyString<mpt::Charset::UTF8, false>;
- OPENMPT_NAMESPACE_END
|