Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringView.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Strings/StringIterator.h"
5#include "Libraries/Foundation/Span.h"
6
7namespace SC
8{
9struct SC_COMPILER_EXPORT StringView;
10struct SC_COMPILER_EXPORT StringViewTokenizer;
11struct SC_COMPILER_EXPORT StringAlgorithms;
12
13} // namespace SC
14
17
20
27
48{
50
52
53 constexpr StringView(StringViewData ssv) : StringViewData(ssv) {}
54
55 static constexpr StringView fromNullTerminated(const char* text, StringEncoding encoding)
56 {
57 return StringViewData::fromNullTerminated(text, encoding);
58 }
59
60#if SC_PLATFORM_WINDOWS
61 static constexpr StringView fromNullTerminated(const wchar_t* text, StringEncoding encoding)
62 {
63 return StringViewData::fromNullTerminated(text, encoding);
64 }
65#endif
69 [[nodiscard]] constexpr const char* bytesIncludingTerminator() const;
70
78 auto getNullTerminatedNative() const;
79
82 Span<const uint8_t> toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
83 {
84 return Span<const uint8_t>::reinterpret_bytes(text, textSizeInBytes);
85 }
86
88 enum class Comparison
89 {
90 Smaller = -1,
91 Equals = 0,
92 Bigger = 1
93 };
94
114 [[nodiscard]] Comparison compare(StringView other) const;
115
132 [[nodiscard]] bool operator<(StringView other) const { return compare(other) == Comparison::Smaller; }
133
140 template <typename Func>
141 [[nodiscard]] constexpr auto withIterator(Func&& func) const;
142
151 template <typename Func>
152 [[nodiscard]] static constexpr auto withIterators(StringView s1, StringView s2, Func&& func);
153
157 template <typename StringIterator>
158 constexpr StringIterator getIterator() const;
159
163 [[nodiscard]] constexpr bool operator!=(StringView other) const { return not operator==(other); }
164
168 [[nodiscard]] constexpr bool operator==(StringView other) const;
169
182 [[nodiscard]] constexpr bool fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const;
183
186 [[nodiscard]] constexpr bool isEmpty() const { return text == nullptr or textSizeInBytes == 0; }
187
190 [[nodiscard]] constexpr bool isNullTerminated() const { return hasNullTerm; }
191
194 [[nodiscard]] constexpr size_t sizeInBytes() const { return textSizeInBytes; }
195
200 [[nodiscard]] constexpr size_t sizeInBytesIncludingTerminator() const;
201
210 [[nodiscard]] bool endsWithAnyOf(Span<const StringCodePoint> codePoints) const;
211
220 [[nodiscard]] bool startsWithAnyOf(Span<const StringCodePoint> codePoints) const;
221
230 [[nodiscard]] bool startsWith(const StringView str) const;
231
240 [[nodiscard]] bool endsWith(const StringView str) const;
241
256 [[nodiscard]] bool containsString(const StringView str) const;
257
270 [[nodiscard]] bool splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const;
271
284 [[nodiscard]] bool splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const;
285
289 [[nodiscard]] bool containsCodePoint(StringCodePoint c) const;
290
295 [[nodiscard]] constexpr bool hasCompatibleEncoding(StringView str) const;
296
298
306 template <typename StringIterator>
308
313 template <typename StringIterator>
315
320 template <typename StringIterator>
322
334 [[nodiscard]] StringView sliceStartEnd(size_t start, size_t end) const;
335
347 [[nodiscard]] StringView sliceStartLength(size_t start, size_t length) const;
348
358 [[nodiscard]] StringView sliceStart(size_t offset) const;
359
369 [[nodiscard]] StringView sliceEnd(size_t offset) const;
370
380 [[nodiscard]] StringView trimEndAnyOf(Span<const StringCodePoint> codePoints) const;
381
392
402 [[nodiscard]] StringView trimAnyOf(Span<const StringCodePoint> codePoints) const;
403
412 [[nodiscard]] StringView trimWhiteSpaces() const;
413
417 [[nodiscard]] constexpr StringView sliceStartBytes(size_t start) const;
418
423 [[nodiscard]] constexpr StringView sliceStartEndBytes(size_t start, size_t end) const;
424
429 [[nodiscard]] constexpr StringView sliceStartLengthBytes(size_t start, size_t length) const;
430
432
442 [[nodiscard]] bool isIntegerNumber() const;
443
456 [[nodiscard]] bool isFloatingNumber() const;
457
471 [[nodiscard]] bool parseInt32(int32_t& value) const;
472
486 [[nodiscard]] bool parseFloat(float& value) const;
487
501 [[nodiscard]] bool parseDouble(double& value) const;
502
503 private:
504 template <typename T>
505 struct identity
506 {
507 };
508 template <typename Type>
509 constexpr StringIteratorASCII getIterator(identity<Type>) const;
510 constexpr StringIteratorUTF8 getIterator(identity<StringIteratorUTF8>) const;
511 constexpr StringIteratorUTF16 getIterator(identity<StringIteratorUTF16>) const;
512 template <typename StringIterator1, typename StringIterator2>
513 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points);
514
515 template <typename StringIterator>
516 constexpr bool equalsIterator(StringView other, size_t& points) const;
517};
518
521{
523
524 size_t numSplitsNonEmpty = 0;
525 size_t numSplitsTotal = 0;
526
530
536
538 StringViewTokenizer(StringView text) : remaining(text), originalText(text) {}
539
545
555 [[nodiscard]] bool tokenizeNext(Span<const StringCodePoint> separators, Options options = Options::SkipEmpty);
556
561
574 [[nodiscard]] bool tokenizeNextLine() { return tokenizeNext({'\n'}); }
575
582
590
592 [[nodiscard]] bool isFinished() const;
593
594 private:
595 StringView originalText; // Original text as passed in the constructor
596};
597
617{
618 [[nodiscard]] static bool matchWildcard(StringView s1, StringView s2);
619
620 private:
621 template <typename StringIterator1, typename StringIterator2>
622 [[nodiscard]] static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
623};
624
626
627//-----------------------------------------------------------------------------------------------------------------------
628// Implementations Details
629//-----------------------------------------------------------------------------------------------------------------------
630namespace SC
631{
632constexpr SC::StringView operator""_a8(const char* txt, size_t sz)
633{
634 return StringView({txt, sz}, true, StringEncoding::Ascii);
635}
636constexpr StringView operator""_u8(const char* txt, size_t sz)
637{
638 return StringView({txt, sz}, true, StringEncoding::Utf8);
639}
640constexpr StringView operator""_u16(const char* txt, size_t sz)
641{
642 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
643 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
644}
645} // namespace SC
646
647[[nodiscard]] constexpr const char* SC::StringView::bytesIncludingTerminator() const
648{
649 SC_ASSERT_RELEASE(hasNullTerm);
650 return text;
651}
652
653template <typename StringIterator>
655{
656 // For GCC complaining about specialization in non-namespace scope
657 return getIterator(identity<StringIterator>());
658}
659
660template <typename Type>
661constexpr SC::StringIteratorASCII SC::StringView::getIterator(identity<Type>) const
662{
663 return StringIteratorASCII(text, text + textSizeInBytes);
664}
665constexpr SC::StringIteratorUTF8 SC::StringView::getIterator(identity<StringIteratorUTF8>) const
666{
667 return StringIteratorUTF8(text, text + textSizeInBytes);
668}
669constexpr SC::StringIteratorUTF16 SC::StringView::getIterator(identity<StringIteratorUTF16>) const
670{
671 return StringIteratorUTF16(text, text + textSizeInBytes);
672}
673
674template <typename StringIterator1, typename StringIterator2>
675constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points)
676{
677 StringCodePoint c1 = 0;
678 StringCodePoint c2 = 0;
679 while (t1.advanceRead(c1) and t2.advanceRead(c2))
680 {
681 if (c1 != c2)
682 {
683 return false;
684 }
685 points++;
686 }
687 return t1.isAtEnd() and t2.isAtEnd();
688}
689
690template <typename StringIterator>
691constexpr bool SC::StringView::equalsIterator(StringView other, size_t& points) const
692{
693 auto it = getIterator<StringIterator>();
694 switch (other.getEncoding())
695 {
696 case StringEncoding::Ascii: return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
697 case StringEncoding::Utf8: return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
698 case StringEncoding::Utf16: return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
699 }
700 Assert::unreachable();
701}
702
703[[nodiscard]] inline auto SC::StringView::getNullTerminatedNative() const
704{
705#if SC_PLATFORM_WINDOWS
706 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf16));
707 return reinterpret_cast<const wchar_t*>(text);
708#else
709 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf8 || getEncoding() == StringEncoding::Ascii));
710 return text;
711#endif
712}
713
714[[nodiscard]] constexpr bool SC::StringView::operator==(StringView other) const
715{
716#if defined(__clang__)
717#pragma clang diagnostic push
718#pragma clang diagnostic ignored "-Wunreachable-code"
719#endif
720 if (hasCompatibleEncoding(other))
721 {
722 if (textSizeInBytes != other.textSizeInBytes)
723 return false;
724 if (__builtin_is_constant_evaluated())
725 {
726 auto it1 = text;
727 auto it2 = other.text;
728 auto sz = textSizeInBytes;
729 for (size_t idx = 0; idx < sz; ++idx)
730 if (it1[idx] != it2[idx])
731 return false;
732 }
733 else
734 {
735 if (text == nullptr)
736 {
737 return other.textSizeInBytes == 0;
738 }
739 if (other.text == nullptr)
740 {
741 return textSizeInBytes == 0;
742 }
743 return memcmp(text, other.text, textSizeInBytes) == 0;
744 }
745 }
746 size_t commonOverlappingPoints = 0;
747 return fullyOverlaps(other, commonOverlappingPoints);
748#if defined(__clang__)
749#pragma clang diagnostic pop
750#endif
751}
752
753constexpr bool SC::StringView::fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const
754{
755 commonOverlappingPoints = 0;
756 switch (getEncoding())
757 {
758 case StringEncoding::Ascii: return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
759 case StringEncoding::Utf8: return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
760 case StringEncoding::Utf16: return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
761 }
762 Assert::unreachable();
763}
764
766{
767 SC_ASSERT_RELEASE(hasNullTerm);
768 return textSizeInBytes > 0 ? textSizeInBytes + StringEncodingGetSize(getEncoding()) : 0;
769}
770
771template <typename Func>
772constexpr auto SC::StringView::withIterator(Func&& func) const
773{
774 switch (getEncoding())
775 {
776 case StringEncoding::Ascii: return func(getIterator<StringIteratorASCII>());
777 case StringEncoding::Utf8: return func(getIterator<StringIteratorUTF8>());
778 case StringEncoding::Utf16: return func(getIterator<StringIteratorUTF16>());
779 }
780 Assert::unreachable();
781}
782
783template <typename Func>
784constexpr auto SC::StringView::withIterators(StringView s1, StringView s2, Func&& func)
785{
786 return s1.withIterator([&s2, &func](auto it1)
787 { return s2.withIterator([&it1, &func](auto it2) { return func(it1, it2); }); });
788}
789
791{
792 return StringEncodingAreBinaryCompatible(getEncoding(), str.getEncoding());
793}
794
795template <typename StringIterator>
797{
798 const ssize_t numBytes = to.bytesDistanceFrom(from);
799 if (numBytes >= 0)
800 {
801 StringIterator fromEnd = from;
802 fromEnd.setToEnd();
803 if (fromEnd.bytesDistanceFrom(to) >= 0) // If current iterator of to is inside from range
804 return StringView({from.it, static_cast<size_t>(numBytes)}, false, StringIterator::getEncoding());
805 }
806 return StringView(); // TODO: Make StringView::fromIterators return bool to make it fallible
807}
808
809template <typename StringIterator>
811{
812 StringIterator endIt = it;
813 endIt.setToEnd();
814 const size_t numBytes = static_cast<size_t>(endIt.bytesDistanceFrom(it));
815 return StringView({it.it, numBytes}, false, StringIterator::getEncoding());
816}
817
818template <typename StringIterator>
819constexpr SC::StringView SC::StringView::fromIteratorFromStart(StringIterator it)
820{
821 StringIterator start = it;
822 start.setToStart();
823 const size_t numBytes = static_cast<size_t>(it.bytesDistanceFrom(start));
824 return StringView({start.it, numBytes}, false, StringIterator::getEncoding());
825}
826
828{
829 if (start < sizeInBytes())
830 return sliceStartLengthBytes(start, sizeInBytes() - start);
831 SC_ASSERT_RELEASE(start < sizeInBytes());
832 return StringView({text, 0}, false, getEncoding());
833}
834
835constexpr SC::StringView SC::StringView::sliceStartEndBytes(size_t start, size_t end) const
836{
837 if (end >= start)
838 return sliceStartLengthBytes(start, end - start);
839 SC_ASSERT_RELEASE(end >= start);
840 return StringView({text, 0}, false, getEncoding());
841}
842
843constexpr SC::StringView SC::StringView::sliceStartLengthBytes(size_t start, size_t length) const
844{
845 if (start + length > sizeInBytes())
846 {
847 SC_ASSERT_RELEASE(start + length > sizeInBytes());
848 return StringView({text, 0}, false, getEncoding());
849 }
850 return StringView({text + start, length}, hasNullTerm and (start + length == sizeInBytes()), getEncoding());
851}
#define SC_COMPILER_EXPORT
Macro for symbol visibility in non-MSVC compilers.
Definition Compiler.h:78
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:66
unsigned long size_t
Platform independent unsigned size type.
Definition PrimitiveTypes.h:56
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:46
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
constexpr uint32_t StringEncodingGetSize(StringEncoding encoding)
Returns the number of bytes to represent an utf unit in the given encoding.
Definition StringIterator.h:29
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:86
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:617
A string iterator for ASCII strings.
Definition StringIterator.h:231
A string iterator for UTF16 strings.
Definition StringIterator.h:251
A string iterator for UTF8 strings.
Definition StringIterator.h:270
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:50
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:397
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:60
An read-only view over a string (to avoid including Strings library)
Definition StringViewData.h:31
constexpr StringViewData(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringViewData.h:35
static constexpr StringViewData fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringViewData.h:48
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringViewData.h:73
Splits a StringView in tokens according to separators.
Definition StringView.h:521
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:538
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:527
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:574
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:524
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:528
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:525
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:529
Options
Definition StringView.h:532
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:533
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:534
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:522
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:48
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
Comparison
Result of ordering comparison done by StringView::compare.
Definition StringView.h:89
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:190
Comparison compare(StringView other) const
Ordering comparison between non-normalized StringView (operates on code points, not on utf graphemes)
constexpr bool operator==(StringView other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:714
constexpr StringView sliceStartEndBytes(size_t start, size_t end) const
Returns a shortened StringView taking a slice from start to end expressed in bytes.
Definition StringView.h:835
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:654
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it)
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:82
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr StringView sliceStartLengthBytes(size_t start, size_t length) const
Returns a shortened StringView taking a slice from start ending at start+length bytes.
Definition StringView.h:843
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:163
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:790
static StringView fromIterators(StringIterator from, StringIterator to)
Returns a StringView from two iterators. The from iterator will be shortened until the start of to.
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:784
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
constexpr StringViewData(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringViewData.h:35
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
auto getNullTerminatedNative() const
Directly access the memory of this null terminated-StringView.
Definition StringView.h:703
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:753
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
constexpr size_t sizeInBytesIncludingTerminator() const
Get size of the StringView in bytes, including null terminator.
Definition StringView.h:765
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
static StringView fromIteratorUntilEnd(StringIterator it)
Returns a section of a string, from it to end of StringView.
constexpr StringView sliceStartBytes(size_t start) const
Returns a shortened StringView from current cutting the first start bytes.
Definition StringView.h:827
constexpr const char * bytesIncludingTerminator() const
Directly access the memory of this null terminated-StringView.
Definition StringView.h:647
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:772
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:186
bool containsString(const StringView str) const
Check if StringView contains another StringView with compatible encoding.
bool operator<(StringView other) const
Ordering operator for StringView using StringView::compare.
Definition StringView.h:132
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:194
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.