4#include "../Strings/StringIterator.h"
5#include "Libraries/Foundation/Span.h"
55 static constexpr StringView fromNullTerminated(
const char* text, StringEncoding encoding)
60#if SC_PLATFORM_WINDOWS
61 static constexpr StringView fromNullTerminated(
const wchar_t* text, StringEncoding encoding)
140 template <
typename Func>
141 [[nodiscard]]
constexpr auto withIterator(Func&& func)
const;
151 template <
typename Func>
157 template <
typename StringIterator>
186 [[nodiscard]]
constexpr bool isEmpty()
const {
return text ==
nullptr or textSizeInBytes == 0; }
194 [[nodiscard]]
constexpr size_t sizeInBytes()
const {
return textSizeInBytes; }
306 template <
typename StringIterator>
313 template <
typename StringIterator>
320 template <
typename StringIterator>
504 template <
typename T>
508 template <
typename Type>
512 template <
typename StringIterator1,
typename StringIterator2>
513 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points);
515 template <
typename StringIterator>
516 constexpr bool equalsIterator(
StringView other,
size_t& points)
const;
621 template <
typename StringIterator1,
typename StringIterator2>
622 [[nodiscard]]
static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
634 return StringView({txt, sz},
true, StringEncoding::Ascii);
636constexpr StringView
operator""_u8(
const char* txt,
size_t sz)
638 return StringView({txt, sz},
true, StringEncoding::Utf8);
640constexpr StringView
operator""_u16(
const char* txt,
size_t sz)
642 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
643 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
653template <
typename StringIterator>
657 return getIterator(identity<StringIterator>());
660template <
typename Type>
667 return StringIteratorUTF8(text, text + textSizeInBytes);
671 return StringIteratorUTF16(text, text + textSizeInBytes);
674template <
typename StringIterator1,
typename StringIterator2>
675constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2,
size_t& points)
679 while (t1.advanceRead(c1) and t2.advanceRead(c2))
687 return t1.isAtEnd() and t2.isAtEnd();
690template <
typename StringIterator>
691constexpr bool SC::StringView::equalsIterator(StringView other,
size_t& points)
const
693 auto it = getIterator<StringIterator>();
694 switch (other.getEncoding())
696 case StringEncoding::Ascii:
return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
697 case StringEncoding::Utf8:
return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
698 case StringEncoding::Utf16:
return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
700 Assert::unreachable();
705#if SC_PLATFORM_WINDOWS
707 return reinterpret_cast<const wchar_t*
>(text);
709 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf8 || getEncoding() == StringEncoding::Ascii));
716#if defined(__clang__)
717#pragma clang diagnostic push
718#pragma clang diagnostic ignored "-Wunreachable-code"
720 if (hasCompatibleEncoding(other))
722 if (textSizeInBytes != other.textSizeInBytes)
724 if (__builtin_is_constant_evaluated())
727 auto it2 = other.text;
728 auto sz = textSizeInBytes;
729 for (
size_t idx = 0; idx < sz; ++idx)
730 if (it1[idx] != it2[idx])
737 return other.textSizeInBytes == 0;
739 if (other.text ==
nullptr)
741 return textSizeInBytes == 0;
743 return memcmp(text, other.text, textSizeInBytes) == 0;
746 size_t commonOverlappingPoints = 0;
747 return fullyOverlaps(other, commonOverlappingPoints);
748#if defined(__clang__)
749#pragma clang diagnostic pop
755 commonOverlappingPoints = 0;
756 switch (getEncoding())
758 case StringEncoding::Ascii:
return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
759 case StringEncoding::Utf8:
return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
760 case StringEncoding::Utf16:
return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
762 Assert::unreachable();
771template <
typename Func>
774 switch (getEncoding())
776 case StringEncoding::Ascii:
return func(getIterator<StringIteratorASCII>());
777 case StringEncoding::Utf8:
return func(getIterator<StringIteratorUTF8>());
778 case StringEncoding::Utf16:
return func(getIterator<StringIteratorUTF16>());
780 Assert::unreachable();
783template <
typename Func>
787 {
return s2.
withIterator([&it1, &func](
auto it2) {
return func(it1, it2); }); });
795template <
typename StringIterator>
804 return StringView({from.it,
static_cast<size_t>(numBytes)},
false, StringIterator::getEncoding());
809template <
typename StringIterator>
812 StringIterator endIt = it;
814 const size_t numBytes =
static_cast<size_t>(endIt.bytesDistanceFrom(it));
815 return StringView({it.it, numBytes},
false, StringIterator::getEncoding());
818template <
typename StringIterator>
821 StringIterator start = it;
823 const size_t numBytes =
static_cast<size_t>(it.bytesDistanceFrom(start));
824 return StringView({start.it, numBytes},
false, StringIterator::getEncoding());
829 if (start < sizeInBytes())
830 return sliceStartLengthBytes(start, sizeInBytes() - start);
832 return StringView({text, 0},
false, getEncoding());
838 return sliceStartLengthBytes(start, end - start);
840 return StringView({text, 0},
false, getEncoding());
845 if (start + length > sizeInBytes())
848 return StringView({text, 0},
false, getEncoding());
850 return StringView({text + start, length}, hasNullTerm and (start + length == sizeInBytes()), getEncoding());
#define SC_COMPILER_EXPORT
Macro for symbol visibility in non-MSVC compilers.
Definition Compiler.h:78
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:66
unsigned long size_t
Platform independent unsigned size type.
Definition PrimitiveTypes.h:56
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:46
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
constexpr uint32_t StringEncodingGetSize(StringEncoding encoding)
Returns the number of bytes to represent an utf unit in the given encoding.
Definition StringIterator.h:29
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:86
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:617
A string iterator for ASCII strings.
Definition StringIterator.h:231
A string iterator for UTF16 strings.
Definition StringIterator.h:251
A string iterator for UTF8 strings.
Definition StringIterator.h:270
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:50
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:397
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:60
An read-only view over a string (to avoid including Strings library)
Definition StringViewData.h:31
constexpr StringViewData(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringViewData.h:35
static constexpr StringViewData fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringViewData.h:48
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringViewData.h:73
Splits a StringView in tokens according to separators.
Definition StringView.h:521
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:538
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:527
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:574
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:524
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:528
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:525
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:529
Options
Definition StringView.h:532
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:533
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:534
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:522
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:48
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
Comparison
Result of ordering comparison done by StringView::compare.
Definition StringView.h:89
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:190
Comparison compare(StringView other) const
Ordering comparison between non-normalized StringView (operates on code points, not on utf graphemes)
constexpr bool operator==(StringView other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:714
constexpr StringView sliceStartEndBytes(size_t start, size_t end) const
Returns a shortened StringView taking a slice from start to end expressed in bytes.
Definition StringView.h:835
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:654
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it)
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:82
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr StringView sliceStartLengthBytes(size_t start, size_t length) const
Returns a shortened StringView taking a slice from start ending at start+length bytes.
Definition StringView.h:843
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:163
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:790
static StringView fromIterators(StringIterator from, StringIterator to)
Returns a StringView from two iterators. The from iterator will be shortened until the start of to.
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:784
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
constexpr StringViewData(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringViewData.h:35
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
auto getNullTerminatedNative() const
Directly access the memory of this null terminated-StringView.
Definition StringView.h:703
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:753
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
constexpr size_t sizeInBytesIncludingTerminator() const
Get size of the StringView in bytes, including null terminator.
Definition StringView.h:765
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
static StringView fromIteratorUntilEnd(StringIterator it)
Returns a section of a string, from it to end of StringView.
constexpr StringView sliceStartBytes(size_t start) const
Returns a shortened StringView from current cutting the first start bytes.
Definition StringView.h:827
constexpr const char * bytesIncludingTerminator() const
Directly access the memory of this null terminated-StringView.
Definition StringView.h:647
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:772
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:186
bool containsString(const StringView str) const
Check if StringView contains another StringView with compatible encoding.
bool operator<(StringView other) const
Ordering operator for StringView using StringView::compare.
Definition StringView.h:132
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:194
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.