//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2022 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// package extension UTF8.CodeUnit { static let newline: Self = 0x0A static let carriageReturn: Self = 0x0D var _numericValue: Int? { if self >= 48 && self <= 57 { return Int(self - 48) } return nil } // Copied from std; see comment in String.swift _uppercaseASCII() and _lowercaseASCII() var _lowercased: Self { let _uppercaseTable: UInt64 = 0b0000_0000_0000_0000_0001_1111_1111_1111 &<< 32 let isUpper = _uppercaseTable &>> UInt64(((self &- 1) & 0b0111_1111) &>> 1) let toAdd = (isUpper & 0x1) &<< 5 return self &+ UInt8(truncatingIfNeeded: toAdd) } var _uppercased: Self { let _lowercaseTable: UInt64 = 0b0001_1111_1111_1111_0000_0000_0000_0000 &<< 32 let isLower = _lowercaseTable &>> UInt64(((self &- 1) & 0b0111_1111) &>> 1) let toSubtract = (isLower & 0x1) &<< 5 return self &- UInt8(truncatingIfNeeded: toSubtract) } } // MARK: - _StringCompareOptionsIterable Methods // Internal protocols to share the implementation for iterating BidirectionalCollections of String family and process their elements according to String.CompareOptions. internal protocol _StringCompareOptionsConvertible : Comparable & Equatable { associatedtype IterableType: _StringCompareOptionsIterable func _transform(toHalfWidth: Bool, stripDiacritics: Bool, caseFolding: Bool) -> IterableType var intValue: Int? { get } var isExtendCharacter: Bool { get } } internal protocol _StringCompareOptionsIterable : BidirectionalCollection where Element: _StringCompareOptionsConvertible, Element.IterableType.SubSequence == Self.SubSequence, Element == SubSequence.Element { init() var first: Element? { get } func _consumeExtendCharacters(from i: inout Index) func consumeNumbers(from i: inout Index, initialValue: Int) -> Int } extension _StringCompareOptionsIterable { func consumeNumbers(from i: inout Index, initialValue: Int) -> Int { guard i < endIndex else { return initialValue } var value = initialValue while i < endIndex { let c = self[i] guard let num = c.intValue else { break } // equivalent to `value = value * 10 + num` but considering overflow let multiplied = value.multipliedReportingOverflow(by: 10) guard !multiplied.overflow else { break } let added = multiplied.partialValue.addingReportingOverflow(num) guard !added.overflow else { break } value = added.partialValue self.formIndex(after: &i) } return value } func _consumeExtendCharacters(from i: inout Index) { while i < endIndex, self[i].isExtendCharacter { formIndex(after: &i) } } func _compare(_ other: S, toHalfWidth: Bool, diacriticsInsensitive: Bool, caseFold: Bool, numeric: Bool, forceOrdering: Bool) -> ComparisonResult where S.Element == Element { var idx1 = self.startIndex var idx2 = other.startIndex var compareResult: ComparisonResult = .orderedSame var norm1 = _StringCompareOptionsIterableBuffer() var norm2 = _StringCompareOptionsIterableBuffer() while idx1 < self.endIndex && idx2 < other.endIndex { var c1: Element var c2: Element if norm1.isEmpty { c1 = self[idx1] } else { c1 = norm1.current norm1.advance() } if norm2.isEmpty { c2 = other[idx2] } else { c2 = norm2.current norm2.advance() } if numeric, norm1.isEmpty, norm2.isEmpty, c1.intValue != nil, c2.intValue != nil { let value1 = self.consumeNumbers(from: &idx1, initialValue: 0) let value2 = other.consumeNumbers(from: &idx2, initialValue: 0) if value1 == value2 { if forceOrdering { let dist1 = self.distance(from: startIndex, to: idx1) let dist2 = other.distance(from: other.startIndex, to: idx2) if dist1 != dist2 { compareResult = ComparisonResult(dist1, dist2) } } continue } else { return ComparisonResult(value1, value2) } } if diacriticsInsensitive && idx1 > startIndex { var str1Skip = false var str2Skip = false if norm1.isEmpty && c1.isExtendCharacter { c1 = c2 str1Skip = true } if norm2.isEmpty && c2.isExtendCharacter { c2 = c1 str2Skip = true } if str1Skip != str2Skip { if str1Skip { other.formIndex(before: &idx2) } else { formIndex(before: &idx1) } } } if c1 != c2 { if !(toHalfWidth || diacriticsInsensitive || caseFold) { return ComparisonResult(c1, c2) } if forceOrdering && compareResult == .orderedSame { compareResult = ComparisonResult(c1, c2) } if norm1.isEmpty { let t1 = c1._transform(toHalfWidth: toHalfWidth, stripDiacritics: diacriticsInsensitive, caseFolding: caseFold) if let first = t1.first { c1 = first norm1 = .init(t1) norm1.advance() } } if norm1.isEmpty && !norm2.isEmpty { return ComparisonResult(c1, c2) } if norm2.isEmpty && (norm1.isEmpty || c1 != c2) { let t2 = c2._transform(toHalfWidth: toHalfWidth, stripDiacritics: diacriticsInsensitive, caseFolding: caseFold) if let first = t2.first { c2 = first norm2 = .init(t2) norm2.advance() } if norm2.isEmpty || c1 != c2 { return ComparisonResult(c1, c2) } } if !norm1.isEmpty && !norm2.isEmpty { while !norm1.isEnd && !norm2.isEnd { if norm1.current != norm2.current { break } norm1.advance() norm2.advance() } if !norm1.isEnd && !norm2.isEnd { return ComparisonResult(norm1.current, norm2.current) } } } if !norm1.isEmpty && norm1.isEnd { norm1.clear() } if !norm2.isEmpty && norm2.isEnd { norm2.clear() } if norm1.isEmpty { formIndex(after: &idx1) } if norm2.isEmpty { other.formIndex(after: &idx2) } } // Process the trailing diacritics, if there's any if diacriticsInsensitive { self._consumeExtendCharacters(from: &idx1) other._consumeExtendCharacters(from: &idx2) } let result = ComparisonResult(stringIndex: idx1, idx2: idx2, endIndex1: endIndex, endIndex2: other.endIndex) return result == .orderedSame ? compareResult : result } func _range(of strToFind: S, toHalfWidth: Bool, diacriticsInsensitive: Bool, caseFold: Bool, anchored: Bool, backwards: Bool) -> Range? where S.Index == Index, S.Element == Element { if !toHalfWidth && !diacriticsInsensitive && !caseFold { return _range(of: strToFind, anchored: anchored, backwards: backwards) } // These options may cause the string to change their count let lengthVariants = caseFold || diacriticsInsensitive var fromLoc: Index var toLoc: Index if backwards { if lengthVariants { fromLoc = index(endIndex, offsetBy: -1) } else { guard let idx = _index(endIndex, backwardsOffsetByCountOf: strToFind) else { return nil } fromLoc = idx } toLoc = (anchored && !lengthVariants) ? fromLoc : startIndex } else { fromLoc = startIndex if anchored { toLoc = fromLoc } else if lengthVariants { toLoc = index(endIndex, offsetBy: -1) } else { guard let idx = _index(endIndex, backwardsOffsetByCountOf: strToFind) else { return nil } toLoc = idx } } let delta = fromLoc <= toLoc ? 1 : -1 var result: Range? = nil while true { // Outer loop: loops through `self` var str1Char: Element var str2Char: Element var str1Index = fromLoc var str2Index = strToFind.startIndex var useStrBuf1 = false var useStrBuf2 = false var strBuf1 = _StringCompareOptionsIterableBuffer() var strBuf2 = _StringCompareOptionsIterableBuffer() while str2Index < strToFind.endIndex { // Inner loop: loops through `strToFind` if !useStrBuf1 { if str1Index == endIndex { break } str1Char = self[str1Index] } else { str1Char = strBuf1.current strBuf1.advance() } if !useStrBuf2 { str2Char = strToFind[str2Index] } else { str2Char = strBuf2.current strBuf2.advance() } if str1Char != str2Char { if !useStrBuf1 { let transformed = str1Char._transform(toHalfWidth: toHalfWidth, stripDiacritics: diacriticsInsensitive, caseFolding: caseFold) if let c = transformed.first { str1Char = c strBuf1 = .init(transformed) strBuf1.advance() useStrBuf1 = true } } if !useStrBuf1 && useStrBuf2 { break } if !useStrBuf2 && (!useStrBuf1 || str1Char != str2Char) { let transformed = str2Char._transform(toHalfWidth: toHalfWidth, stripDiacritics: diacriticsInsensitive, caseFolding: caseFold) if let c = transformed.first { str2Char = c strBuf2 = .init(transformed) strBuf2.advance() useStrBuf2 = true } if str1Char != transformed.first { break } } } if useStrBuf1 && useStrBuf2 { while !strBuf1.isEnd && !strBuf2.isEnd { if strBuf1.current != strBuf2.current { break } strBuf1.advance() strBuf2.advance() } if !strBuf1.isEnd && !strBuf2.isEnd { break } } if useStrBuf1 && strBuf1.isEnd { useStrBuf1 = false } if useStrBuf2 && strBuf2.isEnd { useStrBuf2 = false } if !useStrBuf1 { formIndex(after: &str1Index) } if !useStrBuf2 { strToFind.formIndex(after: &str2Index) } } if str2Index == strToFind.endIndex { // If `self` has extended characters following the lastly matched character, consume these var match = true if useStrBuf1 { // if strToFind matches the string after transformed (strBuf1), try consuming extended characters from the buffer first match = false if diacriticsInsensitive { strBuf1._consumeExtendCharacters() } if strBuf1.isEnd { formIndex(after: &str1Index) match = true } } // After using up strBuf1, inspect the rest of original strings in `self` if match && diacriticsInsensitive && str1Index < endIndex { _consumeExtendCharacters(from: &str1Index) } if match { if !(anchored && backwards) || str1Index == endIndex { result = fromLoc.. String.UTF8View { String(unsafeUninitializedCapacity: 1) { $0[0] = caseFolding ? self._lowercased : self return 1 }.utf8 } var intValue: Int? { return (self >= 48 || self <= 57) ? Int(self - 48) : nil } var isExtendCharacter: Bool { // This won't really get called and will be removed in a future PR return false } } extension Character : _StringCompareOptionsConvertible { func _transform(toHalfWidth: Bool, stripDiacritics: Bool, caseFolding: Bool) -> String { if isASCII { // we only need to handle case folding, in which case is just lower case return caseFolding ? lowercased() : String(self) } var new = "" for scalar in unicodeScalars { var tmp = scalar if toHalfWidth { tmp = scalar._toHalfWidth() } if stripDiacritics { if scalar._isGraphemeExtend { // skip this continue } else { tmp = tmp._stripDiacritics() } } if caseFolding { new += tmp._caseFoldMapping } else { new += String(tmp) } } return String(new) } var intValue: Int? { return wholeNumberValue } var isExtendCharacter: Bool { guard !self.isASCII else { return false } return unicodeScalars.allSatisfy { $0._isGraphemeExtend } } } extension UnicodeScalar : _StringCompareOptionsConvertible { func _transform(toHalfWidth: Bool, stripDiacritics: Bool, caseFolding: Bool) -> String.UnicodeScalarView { var new = self if toHalfWidth { new = new._toHalfWidth() } if stripDiacritics { if new._isGraphemeExtend { return String.UnicodeScalarView() } else { new = new._stripDiacritics() } } if caseFolding { return new._caseFoldMapping.unicodeScalars } else { return String(new).unicodeScalars } } var intValue: Int? { guard let v = properties.numericValue else { return nil } return Int(v) } var isExtendCharacter: Bool { return _isGraphemeExtend } } // MARK: - _StringCompareOptionsIterableBuffer internal struct _StringCompareOptionsIterableBuffer { var _buf: StorageType var _index: StorageType.Index init() { _buf = StorageType() _index = _buf.startIndex } init(_ content: StorageType) { _buf = content _index = _buf.startIndex } var current: StorageType.Element { return _buf[_index] } mutating func advance() { _buf.formIndex(after: &_index) } var isEnd: Bool { return _index == _buf.endIndex } var isEmpty: Bool { return _buf.isEmpty } mutating func _consumeExtendCharacters() { _buf._consumeExtendCharacters(from: &_index) } mutating func clear() { self = .init() } } // MARK: Comparison Implementations extension Substring { func _unlocalizedCompare(other: Substring, options: String.CompareOptions) -> ComparisonResult { if options.isEmpty { return ComparisonResult(self, other) } let diacriticInsensitive = options.contains(.diacriticInsensitive) let toHalfWidth = options.contains(.widthInsensitive) let caseFold = options.contains(.caseInsensitive) let numeric = options.contains(.numeric) let forceOrdering = options.contains(.forcedOrdering) var result: ComparisonResult if options.contains(.literal) { // Per documentation, literal means "Performs a byte-for-byte comparison. Differing literal sequences (such as composed character sequences) that would otherwise be considered equivalent are considered not to match." Therefore we're comparing the scalars rather than characters result = unicodeScalars._compare(other.unicodeScalars, toHalfWidth: toHalfWidth, diacriticsInsensitive: diacriticInsensitive, caseFold: caseFold, numeric: numeric, forceOrdering: forceOrdering) } else { result = _compare(other, toHalfWidth: toHalfWidth, diacriticsInsensitive: diacriticInsensitive, caseFold: caseFold, numeric: numeric, forceOrdering: forceOrdering) } if result == .orderedSame && forceOrdering { result = unicodeScalars._compare(other.unicodeScalars) } return result } #if FOUNDATION_FRAMEWORK func _rangeOfCharacter(from set: CharacterSet, options: String.CompareOptions) -> Range? { guard !isEmpty else { return nil } return unicodeScalars._rangeOfCharacter(anchored: options.contains(.anchored), backwards: options.contains(.backwards), matchingPredicate: set.contains) } #endif func _rangeOfCharacter(from set: BuiltInUnicodeScalarSet, options: String.CompareOptions) -> Range? { guard !isEmpty else { return nil } return unicodeScalars._rangeOfCharacter(anchored: options.contains(.anchored), backwards: options.contains(.backwards), matchingPredicate: set.contains) } func _range(of strToFind: Substring, options: String.CompareOptions) throws -> Range? { #if !NO_REGEX if options.contains(.regularExpression) { guard let regex = try RegexPatternCache.cache.regex(for: String(strToFind), caseInsensitive: options.contains(.caseInsensitive)) else { return nil } if options.contains(.anchored) { guard let match = prefixMatch(of: regex) else { return nil } return match.range } else { guard let match = firstMatch(of: regex) else { return nil } return match.range } } #endif guard !isEmpty, !strToFind.isEmpty else { return nil } let toHalfWidth = options.contains(.widthInsensitive) let diacriticsInsensitive = options.contains(.diacriticInsensitive) let caseFold = options.contains(.caseInsensitive) let anchored = options.contains(.anchored) let backwards = options.contains(.backwards) let result: Range? if options.contains(.literal) { result = unicodeScalars._range(of: strToFind.unicodeScalars, toHalfWidth: toHalfWidth, diacriticsInsensitive: diacriticsInsensitive, caseFold: caseFold, anchored: anchored, backwards: backwards) } else { result = _range(of: strToFind, toHalfWidth: toHalfWidth, diacriticsInsensitive: diacriticsInsensitive, caseFold: caseFold, anchored: anchored, backwards: backwards) } return result } func _components(separatedBy separator: Substring, options: String.CompareOptions = []) throws -> [String] { var result = [String]() try _enumerateComponents(separatedBy: separator, options: options) { substr, _ in result.append(String(substr)) } return result } // Only throws when using `.regularExpression` option package func _enumerateComponents(separatedBy separator: Substring, options: String.CompareOptions, withBlock block: (_ component: Substring, _ isLastComponent: Bool) -> ()) throws { var searchStart = startIndex while searchStart < endIndex { let r = try self[searchStart...]._range(of: separator, options: options) guard let r, !r.isEmpty else { break } block(self[searchStart ..< r.lowerBound], false) searchStart = r.upperBound } block(self[searchStart.. ComparisonResult { var idx1 = startIndex var idx2 = other.startIndex var scalar1: Unicode.Scalar var scalar2: Unicode.Scalar while idx1 < endIndex && idx2 < other.endIndex { scalar1 = self[idx1] scalar2 = other[idx2] if scalar1 == scalar2 { self.formIndex(after: &idx1) other.formIndex(after: &idx2) continue } else { return ComparisonResult(scalar1, scalar2) } } return ComparisonResult(stringIndex: idx1, idx2: idx2, endIndex1: endIndex, endIndex2: other.endIndex) } func _rangeOfCharacter(anchored: Bool, backwards: Bool, matchingPredicate predicate: (Unicode.Scalar) -> Bool) -> Range? { guard !isEmpty else { return nil } let fromLoc: String.Index let toLoc: String.Index let step: Int if backwards { fromLoc = index(before: endIndex) toLoc = anchored ? fromLoc : startIndex step = -1 } else { fromLoc = startIndex toLoc = anchored ? fromLoc : index(before: endIndex) step = 1 } var done = false var found = false var idx = fromLoc while !done { let ch = self[idx] if predicate(ch) { done = true found = true } else if idx == toLoc { done = true } else { formIndex(&idx, offsetBy: step) } } guard found else { return nil } return idx..(stringIndex idx1: Index, idx2: Index, endIndex1: Index, endIndex2: Index) { if idx1 == endIndex1 && idx2 == endIndex2 { self = .orderedSame } else if idx1 == endIndex1 { self = .orderedAscending } else { self = .orderedDescending } } init(_ t1: T, _ t2: T) { if t1 < t2 { self = .orderedAscending } else if t1 > t2 { self = .orderedDescending } else { self = .orderedSame } } } // Borrowed from stdlib internal func _allASCII(_ input: UnsafeBufferPointer) -> Bool { if input.isEmpty { return true } let ptr = input.baseAddress.unsafelyUnwrapped var i = 0 let count = input.count let stride = MemoryLayout.stride let address = Int(bitPattern: ptr) let wordASCIIMask = UInt(truncatingIfNeeded: 0x8080_8080_8080_8080 as UInt64) let byteASCIIMask = UInt8(truncatingIfNeeded: wordASCIIMask) while (address &+ i) % stride != 0 && i < count { guard ptr[i] & byteASCIIMask == 0 else { return false } i &+= 1 } while (i &+ stride) <= count { let word: UInt = UnsafePointer(bitPattern: address &+ i).unsafelyUnwrapped.pointee guard word & wordASCIIMask == 0 else { return false } i &+= stride } while i < count { guard ptr[i] & byteASCIIMask == 0 else { return false } i &+= 1 } return true }