Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringView.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Span.h"
5#include "../Strings/StringIterator.h"
6
7namespace SC
8{
9struct SC_COMPILER_EXPORT StringView;
10struct SC_COMPILER_EXPORT StringViewTokenizer;
11struct SC_COMPILER_EXPORT StringAlgorithms;
12
13} // namespace SC
14
17
20
44
46{
47 StringView() : StringSpan() {}
48
50
51 constexpr StringView(StringSpan ssv) : StringSpan(ssv) {}
52
53 static constexpr StringView fromNullTerminated(const char* text, StringEncoding encoding)
54 {
55 return StringSpan::fromNullTerminated(text, encoding);
56 }
57
58#if SC_PLATFORM_WINDOWS
59 static constexpr StringView fromNullTerminated(const wchar_t* text, StringEncoding encoding)
60 {
61 return StringSpan::fromNullTerminated(text, encoding);
62 }
63#endif
64
72 auto getNullTerminatedNative() const;
73
76 Span<const uint8_t> toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
77 {
78 return Span<const uint8_t>::reinterpret_bytes(text, textSizeInBytes);
79 }
80
82 enum class Comparison
83 {
84 Smaller = -1,
85 Equals = 0,
86 Bigger = 1
87 };
88
108 [[nodiscard]] Comparison compare(StringView other) const;
109
126 [[nodiscard]] bool operator<(StringView other) const { return compare(other) == Comparison::Smaller; }
127
134 template <typename Func>
135 [[nodiscard]] constexpr auto withIterator(Func&& func) const;
136
145 template <typename Func>
146 [[nodiscard]] static constexpr auto withIterators(StringView s1, StringView s2, Func&& func);
147
151 template <typename StringIterator>
152 constexpr StringIterator getIterator() const;
153
157 [[nodiscard]] constexpr bool operator!=(StringView other) const { return not operator==(other); }
158
162 [[nodiscard]] constexpr bool operator==(StringSpan other) const;
163
176 [[nodiscard]] constexpr bool fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const;
177
180 [[nodiscard]] constexpr bool isEmpty() const { return text == nullptr or textSizeInBytes == 0; }
181
184 [[nodiscard]] constexpr bool isNullTerminated() const { return hasNullTerm; }
185
188 [[nodiscard]] constexpr size_t sizeInBytes() const { return textSizeInBytes; }
189
198 [[nodiscard]] bool endsWithAnyOf(Span<const StringCodePoint> codePoints) const;
199
208 [[nodiscard]] bool startsWithAnyOf(Span<const StringCodePoint> codePoints) const;
209
218 [[nodiscard]] bool startsWith(const StringView str) const;
219
228 [[nodiscard]] bool endsWith(const StringView str) const;
229
242 [[nodiscard]] bool containsString(const StringView str) const;
243
256 [[nodiscard]] bool splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const;
257
270 [[nodiscard]] bool splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const;
271
275 [[nodiscard]] bool containsCodePoint(StringCodePoint c) const;
276
281 [[nodiscard]] constexpr bool hasCompatibleEncoding(StringView str) const;
282
291 template <typename StringIterator>
293 StringEncoding encoding = StringIterator::getEncoding());
294
300 template <typename StringIterator>
301 static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding = StringIterator::getEncoding());
302
308 template <typename StringIterator>
310 StringEncoding encoding = StringIterator::getEncoding());
311
323 [[nodiscard]] StringView sliceStartEnd(size_t start, size_t end) const;
324
336 [[nodiscard]] StringView sliceStartLength(size_t start, size_t length) const;
337
347 [[nodiscard]] StringView sliceStart(size_t offset) const;
348
358 [[nodiscard]] StringView sliceEnd(size_t offset) const;
359
369 [[nodiscard]] StringView trimEndAnyOf(Span<const StringCodePoint> codePoints) const;
370
381
391 [[nodiscard]] StringView trimAnyOf(Span<const StringCodePoint> codePoints) const;
392
401 [[nodiscard]] StringView trimWhiteSpaces() const;
402
404
414 [[nodiscard]] bool isIntegerNumber() const;
415
428 [[nodiscard]] bool isFloatingNumber() const;
429
443 [[nodiscard]] bool parseInt32(int32_t& value) const;
444
458 [[nodiscard]] bool parseFloat(float& value) const;
459
473 [[nodiscard]] bool parseDouble(double& value) const;
474
475 private:
476 template <typename T>
477 struct identity
478 {
479 };
480 template <typename Type>
481 constexpr StringIteratorASCII getIterator(identity<Type>) const;
482 constexpr StringIteratorUTF8 getIterator(identity<StringIteratorUTF8>) const;
483 constexpr StringIteratorUTF16 getIterator(identity<StringIteratorUTF16>) const;
484 template <typename StringIterator1, typename StringIterator2>
485 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points);
486
487 template <typename StringIterator>
488 constexpr bool equalsIterator(StringView other, size_t& points) const;
489};
490
493{
495
496 size_t numSplitsNonEmpty = 0;
497 size_t numSplitsTotal = 0;
498
502
508
510 StringViewTokenizer(StringView text) : remaining(text), originalText(text) {}
511
525 [[nodiscard]] bool tokenizeNext(Span<const StringCodePoint> separators, Options options = Options::SkipEmpty);
526
542 [[nodiscard]] bool tokenizeNextLine() { return tokenizeNext({'\n'}); }
543
556
558 [[nodiscard]] bool isFinished() const;
559
560 private:
561 StringView originalText; // Original text as passed in the constructor
562};
563
583{
584 [[nodiscard]] static bool matchWildcard(StringView s1, StringView s2);
585
586 private:
587 template <typename StringIterator1, typename StringIterator2>
588 [[nodiscard]] static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
589};
590
592
593//-----------------------------------------------------------------------------------------------------------------------
594// Implementations Details
595//-----------------------------------------------------------------------------------------------------------------------
596namespace SC
597{
598constexpr SC::StringView operator""_a8(const char* txt, size_t sz)
599{
600 return StringView({txt, sz}, true, StringEncoding::Ascii);
601}
602constexpr StringView operator""_u8(const char* txt, size_t sz)
603{
604 return StringView({txt, sz}, true, StringEncoding::Utf8);
605}
606constexpr StringView operator""_u16(const char* txt, size_t sz)
607{
608 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
609 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
610}
611} // namespace SC
612
613template <typename StringIterator>
615{
616 // For GCC complaining about specialization in non-namespace scope
617 return getIterator(identity<StringIterator>());
618}
619
620template <typename Type>
621constexpr SC::StringIteratorASCII SC::StringView::getIterator(identity<Type>) const
622{
623 return StringIteratorASCII(text, text + textSizeInBytes);
624}
625constexpr SC::StringIteratorUTF8 SC::StringView::getIterator(identity<StringIteratorUTF8>) const
626{
627 return StringIteratorUTF8(text, text + textSizeInBytes);
628}
629constexpr SC::StringIteratorUTF16 SC::StringView::getIterator(identity<StringIteratorUTF16>) const
630{
631 return StringIteratorUTF16(text, text + textSizeInBytes);
632}
633
634template <typename StringIterator1, typename StringIterator2>
635constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points)
636{
637 StringCodePoint c1 = 0;
638 StringCodePoint c2 = 0;
639 while (t1.advanceRead(c1) and t2.advanceRead(c2))
640 {
641 if (c1 != c2)
642 {
643 return false;
644 }
645 points++;
646 }
647 return t1.isAtEnd() and t2.isAtEnd();
648}
649
650template <typename StringIterator>
651constexpr bool SC::StringView::equalsIterator(StringView other, size_t& points) const
652{
653 auto it = getIterator<StringIterator>();
654 switch (other.getEncoding())
655 {
656 case StringEncoding::Ascii: return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
657 case StringEncoding::Utf8: return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
658 case StringEncoding::Utf16: return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
659 }
660 Assert::unreachable();
661}
662
663[[nodiscard]] inline auto SC::StringView::getNullTerminatedNative() const
664{
665#if SC_PLATFORM_WINDOWS
666 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf16));
667 return reinterpret_cast<const wchar_t*>(text);
668#else
669 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf8 || getEncoding() == StringEncoding::Ascii));
670 return text;
671#endif
672}
673
674[[nodiscard]] constexpr bool SC::StringView::operator==(StringSpan other) const
675{
676#if defined(__clang__)
677#pragma clang diagnostic push
678#pragma clang diagnostic ignored "-Wunreachable-code"
679#endif
680 if (hasCompatibleEncoding(other))
681 {
682 if (textSizeInBytes != other.textSizeInBytes)
683 return false;
684 if (__builtin_is_constant_evaluated())
685 {
686 auto it1 = text;
687 auto it2 = other.text;
688 auto sz = textSizeInBytes;
689 for (size_t idx = 0; idx < sz; ++idx)
690 if (it1[idx] != it2[idx])
691 return false;
692 }
693 else
694 {
695 if (text == nullptr)
696 {
697 return other.textSizeInBytes == 0;
698 }
699 if (other.text == nullptr)
700 {
701 return textSizeInBytes == 0;
702 }
703 return memcmp(text, other.text, textSizeInBytes) == 0;
704 }
705 }
706 size_t commonOverlappingPoints = 0;
707 return fullyOverlaps(other, commonOverlappingPoints);
708#if defined(__clang__)
709#pragma clang diagnostic pop
710#endif
711}
712
713constexpr bool SC::StringView::fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const
714{
715 commonOverlappingPoints = 0;
716 switch (getEncoding())
717 {
718 case StringEncoding::Ascii: return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
719 case StringEncoding::Utf8: return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
720 case StringEncoding::Utf16: return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
721 }
722 Assert::unreachable();
723}
724
725template <typename Func>
726constexpr auto SC::StringView::withIterator(Func&& func) const
727{
728 switch (getEncoding())
729 {
730 case StringEncoding::Ascii: return func(getIterator<StringIteratorASCII>());
731 case StringEncoding::Utf8: return func(getIterator<StringIteratorUTF8>());
732 case StringEncoding::Utf16: return func(getIterator<StringIteratorUTF16>());
733 }
734 Assert::unreachable();
735}
736
737template <typename Func>
738constexpr auto SC::StringView::withIterators(StringView s1, StringView s2, Func&& func)
739{
740 return s1.withIterator([&s2, &func](auto it1)
741 { return s2.withIterator([&it1, &func](auto it2) { return func(it1, it2); }); });
742}
743
745{
746 return StringEncodingAreBinaryCompatible(getEncoding(), str.getEncoding());
747}
748
749template <typename StringIterator>
750inline SC::StringView SC::StringView::fromIterators(StringIterator from, StringIterator to, StringEncoding encoding)
751{
752 const ssize_t numBytes = to.bytesDistanceFrom(from);
753 if (numBytes >= 0)
754 {
755 StringIterator fromEnd = from;
756 fromEnd.setToEnd();
757 if (fromEnd.bytesDistanceFrom(to) >= 0) // If current iterator of to is inside from range
758 return StringView({from.it, static_cast<size_t>(numBytes)}, false, encoding);
759 }
760 return StringView(encoding); // TODO: Make StringView::fromIterators return bool to make it fallible
761}
762
763template <typename StringIterator>
764inline SC::StringView SC::StringView::fromIteratorUntilEnd(StringIterator it, StringEncoding encoding)
765{
766 StringIterator endIt = it;
767 endIt.setToEnd();
768 const size_t numBytes = static_cast<size_t>(endIt.bytesDistanceFrom(it));
769 return StringView({it.it, numBytes}, false, encoding);
770}
771
772template <typename StringIterator>
773constexpr SC::StringView SC::StringView::fromIteratorFromStart(StringIterator it, StringEncoding encoding)
774{
775 StringIterator start = it;
776 start.setToStart();
777 const size_t numBytes = static_cast<size_t>(it.bytesDistanceFrom(start));
778 return StringView({start.it, numBytes}, false, encoding);
779}
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:42
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:46
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:80
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:583
A string iterator for ASCII strings.
Definition StringIterator.h:233
A string iterator for UTF16 strings.
Definition StringIterator.h:253
A string iterator for UTF8 strings.
Definition StringIterator.h:272
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:399
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
An read-only view over a string (to avoid including Strings library when parsing is not needed).
Definition StringSpan.h:37
static constexpr StringSpan fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringSpan.h:54
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringSpan.h:83
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
Splits a StringView in tokens according to separators.
Definition StringView.h:493
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:510
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:499
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:542
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:496
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:500
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:497
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:501
Options
Definition StringView.h:504
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:505
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:506
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:494
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:46
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
Comparison
Result of ordering comparison done by StringView::compare.
Definition StringView.h:83
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:184
Comparison compare(StringView other) const
Ordering comparison between non-normalized StringView (operates on code points, not on utf graphemes)
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:614
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:76
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:157
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:744
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:738
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from it to end of StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
auto getNullTerminatedNative() const
Directly access the memory of this null terminated-StringView.
Definition StringView.h:663
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:713
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:726
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:180
bool containsString(const StringView str) const
Check if StringView contains another StringView.
bool operator<(StringView other) const
Ordering operator for StringView using StringView::compare.
Definition StringView.h:126
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:188
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
constexpr bool operator==(StringSpan other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:674
static StringView fromIterators(StringIterator from, StringIterator to, StringEncoding encoding=StringIterator::getEncoding())
Returns a StringView starting at from and ending at to.