Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringView.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Span.h"
5#include "../Strings/StringIterator.h"
6#include "StringsExport.h"
7
8namespace SC
9{
10struct SC_STRINGS_EXPORT StringView;
11struct SC_STRINGS_EXPORT StringViewTokenizer;
12struct SC_STRINGS_EXPORT StringAlgorithms;
13
14} // namespace SC
15
18
21
45
47{
48 StringView() : StringSpan() {}
49
51
52 constexpr StringView(StringSpan ssv) : StringSpan(ssv) {}
53
54 static constexpr StringView fromNullTerminated(const char* text, StringEncoding encoding)
55 {
56 return StringSpan::fromNullTerminated(text, encoding);
57 }
58
59#if SC_PLATFORM_WINDOWS
60 static constexpr StringView fromNullTerminated(const wchar_t* text, StringEncoding encoding)
61 {
62 return StringSpan::fromNullTerminated(text, encoding);
63 }
64#endif
65
68 Span<const uint8_t> toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
69 {
70 return Span<const uint8_t>::reinterpret_bytes(text, textSizeInBytes);
71 }
72
79 template <typename Func>
80 [[nodiscard]] constexpr auto withIterator(Func&& func) const;
81
90 template <typename Func>
91 [[nodiscard]] static constexpr auto withIterators(StringView s1, StringView s2, Func&& func);
92
96 template <typename StringIterator>
97 constexpr StringIterator getIterator() const;
98
102 [[nodiscard]] constexpr bool operator!=(StringView other) const { return not operator==(other); }
103
107 [[nodiscard]] constexpr bool operator==(StringView other) const;
108
121 [[nodiscard]] constexpr bool fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const;
122
125 [[nodiscard]] constexpr bool isEmpty() const { return text == nullptr or textSizeInBytes == 0; }
126
129 [[nodiscard]] constexpr bool isNullTerminated() const { return hasNullTerm; }
130
133 [[nodiscard]] constexpr size_t sizeInBytes() const { return textSizeInBytes; }
134
143 [[nodiscard]] bool endsWithAnyOf(Span<const StringCodePoint> codePoints) const;
144
153 [[nodiscard]] bool startsWithAnyOf(Span<const StringCodePoint> codePoints) const;
154
163 [[nodiscard]] bool startsWith(const StringView str) const;
164
167 [[nodiscard]] bool startsWithIgnoreCaseASCII(const StringView str) const;
168
177 [[nodiscard]] bool endsWith(const StringView str) const;
178
191 [[nodiscard]] bool containsString(const StringView str) const;
192
195 [[nodiscard]] bool containsStringIgnoreCaseASCII(const StringView str) const;
196
199 [[nodiscard]] bool equalsIgnoreCaseASCII(const StringView str) const;
200
213 [[nodiscard]] bool splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const;
214
227 [[nodiscard]] bool splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const;
228
232 [[nodiscard]] bool containsCodePoint(StringCodePoint c) const;
233
238 [[nodiscard]] constexpr bool hasCompatibleEncoding(StringView str) const;
239
248 template <typename StringIterator>
250 StringEncoding encoding = StringIterator::getEncoding());
251
257 template <typename StringIterator>
258 static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding = StringIterator::getEncoding());
259
265 template <typename StringIterator>
267 StringEncoding encoding = StringIterator::getEncoding());
268
280 [[nodiscard]] StringView sliceStartEnd(size_t start, size_t end) const;
281
293 [[nodiscard]] StringView sliceStartLength(size_t start, size_t length) const;
294
304 [[nodiscard]] StringView sliceStart(size_t offset) const;
305
315 [[nodiscard]] StringView sliceEnd(size_t offset) const;
316
326 [[nodiscard]] StringView trimEndAnyOf(Span<const StringCodePoint> codePoints) const;
327
338
348 [[nodiscard]] StringView trimAnyOf(Span<const StringCodePoint> codePoints) const;
349
358 [[nodiscard]] StringView trimWhiteSpaces() const;
359
361
371 [[nodiscard]] bool isIntegerNumber() const;
372
385 [[nodiscard]] bool isFloatingNumber() const;
386
400 [[nodiscard]] bool parseInt32(int32_t& value) const;
401
415 [[nodiscard]] bool parseFloat(float& value) const;
416
430 [[nodiscard]] bool parseDouble(double& value) const;
431
432 private:
433 template <typename T>
434 struct identity
435 {
436 };
437 template <typename Type>
438 constexpr StringIteratorASCII getIterator(identity<Type>) const;
439 constexpr StringIteratorUTF8 getIterator(identity<StringIteratorUTF8>) const;
440 constexpr StringIteratorUTF16 getIterator(identity<StringIteratorUTF16>) const;
441 template <typename StringIterator1, typename StringIterator2>
442 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points);
443
444 template <typename StringIterator>
445 constexpr bool equalsIterator(StringView other, size_t& points) const;
446};
447
450{
452
453 size_t numSplitsNonEmpty = 0;
454 size_t numSplitsTotal = 0;
455
459
465
467 StringViewTokenizer(StringView text) : remaining(text), originalText(text) {}
468
482 [[nodiscard]] bool tokenizeNext(Span<const StringCodePoint> separators, Options options = Options::SkipEmpty);
483
499 [[nodiscard]] bool tokenizeNextLine() { return tokenizeNext({'\n'}); }
500
513
515 [[nodiscard]] bool isFinished() const;
516
517 private:
518 StringView originalText; // Original text as passed in the constructor
519};
520
540{
541 [[nodiscard]] static bool matchWildcard(StringView s1, StringView s2);
542
543 private:
544 template <typename StringIterator1, typename StringIterator2>
545 [[nodiscard]] static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
546};
547
549
550//-----------------------------------------------------------------------------------------------------------------------
551// Implementations Details
552//-----------------------------------------------------------------------------------------------------------------------
553namespace SC
554{
555constexpr SC::StringView operator""_a8(const char* txt, size_t sz)
556{
557 return StringView({txt, sz}, true, StringEncoding::Ascii);
558}
559constexpr StringView operator""_u8(const char* txt, size_t sz)
560{
561 return StringView({txt, sz}, true, StringEncoding::Utf8);
562}
563constexpr StringView operator""_u16(const char* txt, size_t sz)
564{
565 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
566 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
567}
568} // namespace SC
569
570template <typename StringIterator>
572{
573 // For GCC complaining about specialization in non-namespace scope
574 return getIterator(identity<StringIterator>());
575}
576
577template <typename Type>
578constexpr SC::StringIteratorASCII SC::StringView::getIterator(identity<Type>) const
579{
580 return StringIteratorASCII(text, text + textSizeInBytes);
581}
582constexpr SC::StringIteratorUTF8 SC::StringView::getIterator(identity<StringIteratorUTF8>) const
583{
584 return StringIteratorUTF8(text, text + textSizeInBytes);
585}
586constexpr SC::StringIteratorUTF16 SC::StringView::getIterator(identity<StringIteratorUTF16>) const
587{
588 return StringIteratorUTF16(text, text + textSizeInBytes);
589}
590
591template <typename StringIterator1, typename StringIterator2>
592constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points)
593{
594 StringCodePoint c1 = 0;
595 StringCodePoint c2 = 0;
596 while (t1.advanceRead(c1) and t2.advanceRead(c2))
597 {
598 if (c1 != c2)
599 {
600 return false;
601 }
602 points++;
603 }
604 return t1.isAtEnd() and t2.isAtEnd();
605}
606
607template <typename StringIterator>
608constexpr bool SC::StringView::equalsIterator(StringView other, size_t& points) const
609{
610 auto it = getIterator<StringIterator>();
611 switch (other.getEncoding())
612 {
613 case StringEncoding::Ascii: return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
614 case StringEncoding::Utf8: return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
615 case StringEncoding::Utf16: return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
616 }
617 Assert::unreachable();
618}
619
620[[nodiscard]] constexpr bool SC::StringView::operator==(StringView other) const
621{
622#if defined(__clang__)
623#pragma clang diagnostic push
624#pragma clang diagnostic ignored "-Wunreachable-code"
625#endif
626 if (__builtin_is_constant_evaluated())
627 {
628 if (not hasCompatibleEncoding(other))
629 return false;
630 auto it1 = text;
631 auto it2 = other.text;
632 auto sz = textSizeInBytes;
633 for (size_t idx = 0; idx < sz; ++idx)
634 if (it1[idx] != it2[idx])
635 return false;
636 return true;
637 }
638 else
639 {
640 return StringSpan::operator==(other);
641 }
642#if defined(__clang__)
643#pragma clang diagnostic pop
644#endif
645}
646
647constexpr bool SC::StringView::fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const
648{
649 commonOverlappingPoints = 0;
650 switch (getEncoding())
651 {
652 case StringEncoding::Ascii: return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
653 case StringEncoding::Utf8: return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
654 case StringEncoding::Utf16: return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
655 }
656 Assert::unreachable();
657}
658
659template <typename Func>
660constexpr auto SC::StringView::withIterator(Func&& func) const
661{
662 switch (getEncoding())
663 {
664 case StringEncoding::Ascii: return func(getIterator<StringIteratorASCII>());
665 case StringEncoding::Utf8: return func(getIterator<StringIteratorUTF8>());
666 case StringEncoding::Utf16: return func(getIterator<StringIteratorUTF16>());
667 }
668 Assert::unreachable();
669}
670
671template <typename Func>
672constexpr auto SC::StringView::withIterators(StringView s1, StringView s2, Func&& func)
673{
674 return s1.withIterator([&s2, &func](auto it1)
675 { return s2.withIterator([&it1, &func](auto it2) { return func(it1, it2); }); });
676}
677
679{
680 return StringEncodingAreBinaryCompatible(getEncoding(), str.getEncoding());
681}
682
683template <typename StringIterator>
684inline SC::StringView SC::StringView::fromIterators(StringIterator from, StringIterator to, StringEncoding encoding)
685{
686 const ssize_t numBytes = to.bytesDistanceFrom(from);
687 if (numBytes >= 0)
688 {
689 StringIterator fromEnd = from;
690 fromEnd.setToEnd();
691 if (fromEnd.bytesDistanceFrom(to) >= 0) // If current iterator of to is inside from range
692 return StringView({from.it, static_cast<size_t>(numBytes)}, false, encoding);
693 }
694 return StringView(encoding); // TODO: Make StringView::fromIterators return bool to make it fallible
695}
696
697template <typename StringIterator>
698inline SC::StringView SC::StringView::fromIteratorUntilEnd(StringIterator it, StringEncoding encoding)
699{
700 StringIterator endIt = it;
701 endIt.setToEnd();
702 const size_t numBytes = static_cast<size_t>(endIt.bytesDistanceFrom(it));
703 return StringView({it.it, numBytes}, false, encoding);
704}
705
706template <typename StringIterator>
707constexpr SC::StringView SC::StringView::fromIteratorFromStart(StringIterator it, StringEncoding encoding)
708{
709 StringIterator start = it;
710 start.setToStart();
711 const size_t numBytes = static_cast<size_t>(it.bytesDistanceFrom(start));
712 return StringView({start.it, numBytes}, false, encoding);
713}
decltype(static_cast< char * >(nullptr) - static_cast< char * >(nullptr)) ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:46
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:37
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:21
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:15
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:81
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:540
A string iterator for ASCII strings.
Definition StringIterator.h:240
A string iterator for UTF16 strings.
Definition StringIterator.h:263
A string iterator for UTF8 strings.
Definition StringIterator.h:281
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:37
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:407
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:47
An read-only view over a string (to avoid including Strings library when parsing is not needed).
Definition StringSpan.h:37
static constexpr StringSpan fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringSpan.h:54
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringSpan.h:98
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
Splits a StringView in tokens according to separators.
Definition StringView.h:450
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:467
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:456
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:499
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:453
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:457
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:454
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:458
Options
Definition StringView.h:461
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:462
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:463
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:451
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:47
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:129
constexpr bool operator==(StringView other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:620
bool equalsIgnoreCaseASCII(const StringView str) const
Check if StringView is equal to another StringView using ASCII-only case-insensitive comparison.
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:571
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from start of StringView to it.
bool containsStringIgnoreCaseASCII(const StringView str) const
Check if StringView contains another StringView using ASCII-only case-insensitive comparison.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:68
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:102
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:678
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:672
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from it to end of StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:647
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
bool startsWithIgnoreCaseASCII(const StringView str) const
Check if StringView starts with another StringView using ASCII-only case-insensitive comparison.
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:660
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:125
bool containsString(const StringView str) const
Check if StringView contains another StringView.
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:133
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
static StringView fromIterators(StringIterator from, StringIterator to, StringEncoding encoding=StringIterator::getEncoding())
Returns a StringView starting at from and ending at to.