4#include "../Strings/StringIterator.h"
64#if SC_PLATFORM_WINDOWS || DOXYGEN
83#if SC_PLATFORM_WINDOWS
177 template <
typename Func>
178 [[nodiscard]]
constexpr auto withIterator(Func&& func)
const;
188 template <
typename Func>
194 template <
typename StringIterator>
223 [[nodiscard]]
constexpr bool isEmpty()
const {
return text ==
nullptr or textSizeInBytes == 0; }
231 [[nodiscard]]
constexpr size_t sizeInBytes()
const {
return textSizeInBytes; }
343 template <
typename StringIterator>
350 template <
typename StringIterator>
357 template <
typename StringIterator>
544 const wchar_t* textWide;
548 static constexpr SizeType NumOptionBits = 3;
549 static constexpr SizeType MaxLength = (~static_cast<SizeType>(0)) >> NumOptionBits;
551 SizeType textSizeInBytes :
sizeof(SizeType) * 8 - NumOptionBits;
552 SizeType encoding : 2;
553 SizeType hasNullTerm : 1;
555 template <
typename T>
559 template <
typename Type>
560 constexpr StringIteratorASCII
getIterator(identity<Type>)
const;
561 constexpr StringIteratorUTF8
getIterator(identity<StringIteratorUTF8>)
const;
562 constexpr StringIteratorUTF16
getIterator(identity<StringIteratorUTF16>)
const;
563 template <
typename StringIterator1,
typename StringIterator2>
564 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points);
566 template <
typename StringIterator>
567 constexpr bool equalsIterator(
StringView other,
size_t& points)
const;
672 template <
typename StringIterator1,
typename StringIterator2>
673 [[nodiscard]]
static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
691constexpr SC::StringView operator""_u16(
const char* txt,
size_t sz)
693 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
698#if SC_PLATFORM_WINDOWS
699#define SC_NATIVE_STR(str) L##str
701#define SC_NATIVE_STR(str) str
705 : text(nullptr), textSizeInBytes(0), encoding(static_cast<SizeType>(
StringEncoding::
Ascii)), hasNullTerm(false)
709 : text(textSpan.data()), textSizeInBytes(static_cast<SizeType>(textSpan.sizeInBytes())),
710 encoding(static_cast<SizeType>(encoding)), hasNullTerm(nullTerm)
715template <SC::
size_t N>
717 : text(text), textSizeInBytes(N - 1), encoding(static_cast<SizeType>(
StringEncoding::
Ascii)), hasNullTerm(true)
720#if SC_PLATFORM_WINDOWS
723 : textWide(text), textSizeInBytes((N - 1) * sizeof(wchar_t)), encoding(static_cast<SizeType>(
StringEncoding::Wide)),
728 : textWide(textSpan.data()), textSizeInBytes(static_cast<SizeType>(textSpan.sizeInBytes())),
729 encoding(static_cast<SizeType>(
StringEncoding::Wide)), hasNullTerm(nullTerm)
741template <
typename StringIterator>
745 return getIterator(identity<StringIterator>());
748template <
typename Type>
755 return StringIteratorUTF8(text, text + textSizeInBytes);
759 return StringIteratorUTF16(text, text + textSizeInBytes);
762template <
typename StringIterator1,
typename StringIterator2>
763constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points)
767 while (t1.advanceRead(c1) and t2.advanceRead(c2))
775 return t1.isAtEnd() and t2.isAtEnd();
778template <
typename StringIterator>
779constexpr bool SC::StringView::equalsIterator(StringView other,
size_t& points)
const
781 auto it = getIterator<StringIterator>();
782 switch (other.getEncoding())
784 case StringEncoding::Ascii:
return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
785 case StringEncoding::Utf8:
return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
786 case StringEncoding::Utf16:
return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
788 Assert::unreachable();
793#if SC_PLATFORM_WINDOWS
795 return reinterpret_cast<const wchar_t*
>(text);
804 if (hasCompatibleEncoding(other))
806 if (textSizeInBytes != other.textSizeInBytes)
808 if (__builtin_is_constant_evaluated())
811 auto it2 = other.text;
812 auto sz = textSizeInBytes;
813 for (
size_t idx = 0; idx < sz; ++idx)
814 if (it1[idx] != it2[idx])
821 return other.textSizeInBytes == 0;
823 if (other.text ==
nullptr)
825 return textSizeInBytes == 0;
827 return memcmp(text, other.text, textSizeInBytes) == 0;
830 size_t commonOverlappingPoints = 0;
831 return fullyOverlaps(other, commonOverlappingPoints);
836 commonOverlappingPoints = 0;
837 switch (getEncoding())
839 case StringEncoding::Ascii:
return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
840 case StringEncoding::Utf8:
return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
841 case StringEncoding::Utf16:
return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
843 Assert::unreachable();
852template <
typename Func>
855 switch (getEncoding())
861 Assert::unreachable();
864template <
typename Func>
868 {
return s2.
withIterator([&it1, &func](
auto it2) {
return func(it1, it2); }); });
876template <
typename StringIterator>
885 return StringView({from.getCurrentIt(),
static_cast<size_t>(numBytes)},
false,
886 StringIterator::getEncoding());
891template <
typename StringIterator>
894 StringIterator endIt = it;
896 const size_t numBytes =
static_cast<size_t>(endIt.bytesDistanceFrom(it));
897 return StringView({it.getCurrentIt(), numBytes},
false, StringIterator::getEncoding());
900template <
typename StringIterator>
903 StringIterator start = it;
905 const size_t numBytes =
static_cast<size_t>(it.bytesDistanceFrom(start));
906 return StringView({start.getCurrentIt(), numBytes},
false, StringIterator::getEncoding());
911 if (start < sizeInBytes())
912 return sliceStartLengthBytes(start, sizeInBytes() - start);
914 return StringView({text, 0},
false, getEncoding());
920 return sliceStartLengthBytes(start, end - start);
922 return StringView({text, 0},
false, getEncoding());
927 if (start + length > sizeInBytes())
930 return StringView({text, 0},
false, getEncoding());
932 return StringView({text + start, length}, hasNullTerm and (start + length == sizeInBytes()), getEncoding());
#define SC_ASSERT_DEBUG(e)
Assert expression e to be true.
Definition: Assert.h:82
#define SC_COMPILER_EXPORT
Macro for symbol visibility in non-MSVC compilers.
Definition: Compiler.h:78
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition: Assert.h:66
int int32_t
Platform independent (4) bytes signed int.
Definition: PrimitiveTypes.h:46
unsigned long size_t
Platform independent unsigned size type.
Definition: PrimitiveTypes.h:56
signed long ssize_t
Platform independent signed size type.
Definition: PrimitiveTypes.h:57
uint32_t StringCodePoint
UTF code point (32 bit)
Definition: StringIterator.h:13
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition: StringIterator.h:302
StringEncoding
String Encoding (Ascii, Utf8, Utf16)
Definition: StringIterator.h:17
constexpr uint32_t StringEncodingGetSize(StringEncoding encoding)
Returns the number of bytes to represent an utf unit in the given encoding.
Definition: StringIterator.h:308
@ Ascii
Encoding is ASCII.
@ Utf16
Encoding is UTF16-LE.
View over a contiguous sequence of items (pointer + size in elements).
Definition: Span.h:21
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition: Span.h:67
constexpr SizeType sizeInBytes() const
Size of Span in bytes.
Definition: Span.h:109
Algorithms operating on strings (glob / wildcard).
Definition: StringView.h:668
A string iterator for ASCII strings.
Definition: StringIterator.h:232
A position inside a fixed range [start, end) of UTF code points.
Definition: StringIterator.h:50
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition: StringIterator.h:412
constexpr void setToEnd()
Set current position to end of iterator range.
Definition: StringIterator.h:60
A string iterator for UTF16 strings.
Definition: StringIterator.h:251
A string iterator for UTF8 strings.
Definition: StringIterator.h:269
Non-owning view over a range of characters with UTF Encoding.
Definition: StringView.h:47
constexpr StringView(Span< const wchar_t > textSpan, bool nullTerm)
Construct an UTF16 StringView from a Span of bytes.
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
Comparison
Result of ordering comparison done by StringView::compare.
Definition: StringView.h:126
constexpr StringView()
Construct an emtpy StringView.
Definition: StringView.h:704
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition: StringView.h:227
Comparison compare(StringView other) const
Ordering comparison between non-normalized StringView (operates on code points, not on utf graphemes)
constexpr const char * bytesWithoutTerminator() const
Directly access the memory of this StringView.
Definition: StringView.h:97
constexpr bool operator==(StringView other) const
Compare this StringView with another StringView for equality.
Definition: StringView.h:802
constexpr StringView sliceStartEndBytes(size_t start, size_t end) const
Returns a shortened StringView taking a slice from start to end expressed in bytes.
Definition: StringView.h:917
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition: StringView.h:742
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it)
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition: StringView.h:119
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr StringView sliceStartLengthBytes(size_t start, size_t length) const
Returns a shortened StringView taking a slice from start ending at start+length bytes.
Definition: StringView.h:925
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition: StringView.h:200
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition: StringView.h:871
static StringView fromIterators(StringIterator from, StringIterator to)
Returns a StringView from two iterators. The from iterator will be shortened until the start of to.
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition: StringView.h:865
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
constexpr Span< const char > toCharSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const char Span from this StringView.
Definition: StringView.h:115
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
static StringView fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null-terminated C-String.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
auto getNullTerminatedNative() const
Directly access the memory of this null terminated-StringView.
Definition: StringView.h:791
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition: StringView.h:834
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition: StringView.h:93
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
constexpr size_t sizeInBytesIncludingTerminator() const
Get size of the StringView in bytes, including null terminator.
Definition: StringView.h:846
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
static StringView fromIteratorUntilEnd(StringIterator it)
Returns a section of a string, from it to end of StringView.
constexpr StringView sliceStartBytes(size_t start) const
Returns a shortened StringView from current cutting the first start bytes.
Definition: StringView.h:909
constexpr const char * bytesIncludingTerminator() const
Directly access the memory of this null terminated-StringView.
Definition: StringView.h:735
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition: StringView.h:853
constexpr bool isEmpty() const
Check if StringView is empty.
Definition: StringView.h:223
bool containsString(const StringView str) const
Check if StringView contains another StringView with compatible encoding.
bool operator<(StringView other) const
Ordering operator for StringView using StringView::compare.
Definition: StringView.h:169
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition: StringView.h:231
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
constexpr StringView(const wchar_t(&text)[N])
Constructs an UTF16 StringView with a null terminated wide-string terminal.
Splits a StringView in tokens according to separators.
Definition: StringView.h:572
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition: StringView.h:589
StringView component
Current component that has been tokenized by tokenizeNext.
Definition: StringView.h:578
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition: StringView.h:625
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition: StringView.h:575
StringView processed
Substring of original string passed in constructor processed so far.
Definition: StringView.h:579
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition: StringView.h:576
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition: StringView.h:580
Options
Definition: StringView.h:583
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition: StringView.h:584
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition: StringView.h:585
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition: StringView.h:573