4#include "../Foundation/Span.h"
5#include "../Strings/StringIterator.h"
9struct SC_COMPILER_EXPORT StringView;
10struct SC_COMPILER_EXPORT StringViewTokenizer;
11struct SC_COMPILER_EXPORT StringAlgorithms;
53 static constexpr StringView fromNullTerminated(
const char* text, StringEncoding encoding)
58#if SC_PLATFORM_WINDOWS
59 static constexpr StringView fromNullTerminated(
const wchar_t* text, StringEncoding encoding)
134 template <
typename Func>
135 [[nodiscard]]
constexpr auto withIterator(Func&& func)
const;
145 template <
typename Func>
151 template <
typename StringIterator>
180 [[nodiscard]]
constexpr bool isEmpty()
const {
return text ==
nullptr or textSizeInBytes == 0; }
188 [[nodiscard]]
constexpr size_t sizeInBytes()
const {
return textSizeInBytes; }
290 template <
typename StringIterator>
297 template <
typename StringIterator>
304 template <
typename StringIterator>
471 template <
typename T>
475 template <
typename Type>
479 template <
typename StringIterator1,
typename StringIterator2>
480 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points);
482 template <
typename StringIterator>
483 constexpr bool equalsIterator(
StringView other,
size_t& points)
const;
582 template <
typename StringIterator1,
typename StringIterator2>
583 [[nodiscard]]
static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
595 return StringView({txt, sz},
true, StringEncoding::Ascii);
597constexpr StringView
operator""_u8(
const char* txt,
size_t sz)
599 return StringView({txt, sz},
true, StringEncoding::Utf8);
601constexpr StringView
operator""_u16(
const char* txt,
size_t sz)
603 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
604 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
608template <
typename StringIterator>
615template <
typename Type>
622 return StringIteratorUTF8(text, text + textSizeInBytes);
626 return StringIteratorUTF16(text, text + textSizeInBytes);
629template <
typename StringIterator1,
typename StringIterator2>
630constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points)
634 while (t1.advanceRead(c1) and t2.advanceRead(c2))
642 return t1.isAtEnd() and t2.isAtEnd();
645template <
typename StringIterator>
646constexpr bool SC::StringView::equalsIterator(StringView other,
size_t& points)
const
648 auto it = getIterator<StringIterator>();
649 switch (other.getEncoding())
651 case StringEncoding::Ascii:
return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
652 case StringEncoding::Utf8:
return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
653 case StringEncoding::Utf16:
return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
655 Assert::unreachable();
660#if SC_PLATFORM_WINDOWS
662 return reinterpret_cast<const wchar_t*
>(text);
664 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf8 || getEncoding() == StringEncoding::Ascii));
671#if defined(__clang__)
672#pragma clang diagnostic push
673#pragma clang diagnostic ignored "-Wunreachable-code"
675 if (hasCompatibleEncoding(other))
677 if (textSizeInBytes != other.textSizeInBytes)
679 if (__builtin_is_constant_evaluated())
682 auto it2 = other.text;
683 auto sz = textSizeInBytes;
684 for (
size_t idx = 0; idx < sz; ++idx)
685 if (it1[idx] != it2[idx])
692 return other.textSizeInBytes == 0;
694 if (other.text ==
nullptr)
696 return textSizeInBytes == 0;
698 return memcmp(text, other.text, textSizeInBytes) == 0;
701 size_t commonOverlappingPoints = 0;
702 return fullyOverlaps(other, commonOverlappingPoints);
703#if defined(__clang__)
704#pragma clang diagnostic pop
710 commonOverlappingPoints = 0;
711 switch (getEncoding())
713 case StringEncoding::Ascii:
return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
714 case StringEncoding::Utf8:
return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
715 case StringEncoding::Utf16:
return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
717 Assert::unreachable();
720template <
typename Func>
723 switch (getEncoding())
725 case StringEncoding::Ascii:
return func(getIterator<StringIteratorASCII>());
726 case StringEncoding::Utf8:
return func(getIterator<StringIteratorUTF8>());
727 case StringEncoding::Utf16:
return func(getIterator<StringIteratorUTF16>());
729 Assert::unreachable();
732template <
typename Func>
736 {
return s2.
withIterator([&it1, &func](
auto it2) {
return func(it1, it2); }); });
744template <
typename StringIterator>
753 return StringView({from.it,
static_cast<size_t>(numBytes)},
false, StringIterator::getEncoding());
758template <
typename StringIterator>
761 StringIterator endIt = it;
763 const size_t numBytes =
static_cast<size_t>(endIt.bytesDistanceFrom(it));
764 return StringView({it.it, numBytes},
false, StringIterator::getEncoding());
767template <
typename StringIterator>
770 StringIterator start = it;
772 const size_t numBytes =
static_cast<size_t>(it.bytesDistanceFrom(start));
773 return StringView({start.it, numBytes},
false, StringIterator::getEncoding());
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:42
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:46
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:80
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:578
A string iterator for ASCII strings.
Definition StringIterator.h:233
A string iterator for UTF16 strings.
Definition StringIterator.h:253
A string iterator for UTF8 strings.
Definition StringIterator.h:272
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:399
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
An read-only view over a string (to avoid including Strings library when parsing is not needed).
Definition StringSpan.h:37
static constexpr StringSpan fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringSpan.h:54
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringSpan.h:83
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
Splits a StringView in tokens according to separators.
Definition StringView.h:488
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:505
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:494
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:537
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:491
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:495
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:492
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:496
Options
Definition StringView.h:499
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:500
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:501
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:489
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:46
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
Comparison
Result of ordering comparison done by StringView::compare.
Definition StringView.h:83
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:184
Comparison compare(StringView other) const
Ordering comparison between non-normalized StringView (operates on code points, not on utf graphemes)
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:609
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it)
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:76
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:157
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:739
static StringView fromIterators(StringIterator from, StringIterator to)
Returns a StringView starting at from and ending at to.
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:733
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
auto getNullTerminatedNative() const
Directly access the memory of this null terminated-StringView.
Definition StringView.h:658
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:708
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
static StringView fromIteratorUntilEnd(StringIterator it)
Returns a section of a string, from it to end of StringView.
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:721
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:180
bool containsString(const StringView str) const
Check if StringView contains another StringView.
bool operator<(StringView other) const
Ordering operator for StringView using StringView::compare.
Definition StringView.h:126
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:188
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
constexpr bool operator==(StringSpan other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:669