4#include "../Foundation/Span.h"
5#include "../Strings/StringIterator.h"
9struct SC_COMPILER_EXPORT StringView;
10struct SC_COMPILER_EXPORT StringViewTokenizer;
11struct SC_COMPILER_EXPORT StringAlgorithms;
53 static constexpr StringView fromNullTerminated(
const char* text, StringEncoding encoding)
58#if SC_PLATFORM_WINDOWS
59 static constexpr StringView fromNullTerminated(
const wchar_t* text, StringEncoding encoding)
78 template <
typename Func>
79 [[nodiscard]]
constexpr auto withIterator(Func&& func)
const;
89 template <
typename Func>
95 template <
typename StringIterator>
124 [[nodiscard]]
constexpr bool isEmpty()
const {
return text ==
nullptr or textSizeInBytes == 0; }
132 [[nodiscard]]
constexpr size_t sizeInBytes()
const {
return textSizeInBytes; }
235 template <
typename StringIterator>
237 StringEncoding encoding = StringIterator::getEncoding());
244 template <
typename StringIterator>
252 template <
typename StringIterator>
254 StringEncoding encoding = StringIterator::getEncoding());
420 template <
typename T>
424 template <
typename Type>
428 template <
typename StringIterator1,
typename StringIterator2>
429 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points);
431 template <
typename StringIterator>
432 constexpr bool equalsIterator(
StringView other,
size_t& points)
const;
531 template <
typename StringIterator1,
typename StringIterator2>
532 [[nodiscard]]
static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
544 return StringView({txt, sz},
true, StringEncoding::Ascii);
546constexpr StringView
operator""_u8(
const char* txt,
size_t sz)
548 return StringView({txt, sz},
true, StringEncoding::Utf8);
550constexpr StringView
operator""_u16(
const char* txt,
size_t sz)
552 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
553 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
557template <
typename StringIterator>
564template <
typename Type>
571 return StringIteratorUTF8(text, text + textSizeInBytes);
575 return StringIteratorUTF16(text, text + textSizeInBytes);
578template <
typename StringIterator1,
typename StringIterator2>
579constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points)
583 while (t1.advanceRead(c1) and t2.advanceRead(c2))
591 return t1.isAtEnd() and t2.isAtEnd();
594template <
typename StringIterator>
595constexpr bool SC::StringView::equalsIterator(StringView other,
size_t& points)
const
597 auto it = getIterator<StringIterator>();
598 switch (other.getEncoding())
600 case StringEncoding::Ascii:
return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
601 case StringEncoding::Utf8:
return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
602 case StringEncoding::Utf16:
return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
604 Assert::unreachable();
609#if defined(__clang__)
610#pragma clang diagnostic push
611#pragma clang diagnostic ignored "-Wunreachable-code"
613 if (__builtin_is_constant_evaluated())
615 if (not hasCompatibleEncoding(other))
618 auto it2 = other.text;
619 auto sz = textSizeInBytes;
620 for (
size_t idx = 0; idx < sz; ++idx)
621 if (it1[idx] != it2[idx])
627 return StringSpan::operator==(other);
629#if defined(__clang__)
630#pragma clang diagnostic pop
636 commonOverlappingPoints = 0;
637 switch (getEncoding())
639 case StringEncoding::Ascii:
return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
640 case StringEncoding::Utf8:
return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
641 case StringEncoding::Utf16:
return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
643 Assert::unreachable();
646template <
typename Func>
649 switch (getEncoding())
651 case StringEncoding::Ascii:
return func(getIterator<StringIteratorASCII>());
652 case StringEncoding::Utf8:
return func(getIterator<StringIteratorUTF8>());
653 case StringEncoding::Utf16:
return func(getIterator<StringIteratorUTF16>());
655 Assert::unreachable();
658template <
typename Func>
662 {
return s2.
withIterator([&it1, &func](
auto it2) {
return func(it1, it2); }); });
670template <
typename StringIterator>
679 return StringView({from.it,
static_cast<size_t>(numBytes)},
false, encoding);
681 return StringView(encoding);
684template <
typename StringIterator>
687 StringIterator endIt = it;
689 const size_t numBytes =
static_cast<size_t>(endIt.bytesDistanceFrom(it));
690 return StringView({it.it, numBytes},
false, encoding);
693template <
typename StringIterator>
696 StringIterator start = it;
698 const size_t numBytes =
static_cast<size_t>(it.bytesDistanceFrom(start));
699 return StringView({start.it, numBytes},
false, encoding);
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:46
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:80
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:527
A string iterator for ASCII strings.
Definition StringIterator.h:239
A string iterator for UTF16 strings.
Definition StringIterator.h:262
A string iterator for UTF8 strings.
Definition StringIterator.h:280
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:406
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
An read-only view over a string (to avoid including Strings library when parsing is not needed).
Definition StringSpan.h:37
static constexpr StringSpan fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringSpan.h:54
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringSpan.h:98
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
Splits a StringView in tokens according to separators.
Definition StringView.h:437
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:454
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:443
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:486
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:440
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:444
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:441
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:445
Options
Definition StringView.h:448
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:449
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:450
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:438
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:46
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:128
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:558
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:67
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:101
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:665
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:659
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from it to end of StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:634
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:647
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:124
bool containsString(const StringView str) const
Check if StringView contains another StringView.
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:132
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
constexpr bool operator==(StringSpan other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:607
static StringView fromIterators(StringIterator from, StringIterator to, StringEncoding encoding=StringIterator::getEncoding())
Returns a StringView starting at from and ending at to.