//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2022 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // //===----------------------------------------------------------------------===// #if FOUNDATION_FRAMEWORK internal import _ForSwiftFoundation #endif #if canImport(Darwin) import Darwin #elseif canImport(Glibc) import Glibc #endif // These provides concrete implementations for String and Substring, enhancing performance over generic StringProtocol. @available(FoundationPreview 0.4, *) extension String { public func data(using encoding: String.Encoding, allowLossyConversion: Bool = false) -> Data? { // allowLossyConversion is a no-op for UTF8 and UTF16. For UTF32, we fall back to NSString when lossy conversion is requested on Darwin platforms. switch encoding { case .utf8: return Data(self.utf8) case .ascii, .nonLossyASCII: if allowLossyConversion { let lossyReplacement = (encoding == .ascii) ? 0xFF : UInt8(ascii: "?") return Data(capacity: self.utf8.count) { for scalar in self.unicodeScalars { if scalar.isASCII { $0.append(fromContentsOf: scalar.utf8) } else { $0.appendElement(lossyReplacement) } } } } else { let earlyCheckAllASCII = self.utf8.withContiguousStorageIfAvailable { _allASCII($0) } if let earlyCheckAllASCII, !earlyCheckAllASCII { return nil } var data = Data(count: self.utf8.count) let allASCII = data.withUnsafeMutableBytes { $0.withMemoryRebound(to: UInt8.self) { buffer in _ = buffer.initialize(fromContentsOf: self.utf8) if let earlyCheckAllASCII { return earlyCheckAllASCII } else { return _allASCII(UnsafeBufferPointer(buffer)) } } } return allASCII ? data : nil } case .utf16BigEndian, .utf16LittleEndian, .utf16: let bom: UInt16? let swap: Bool if encoding == .utf16 { swap = false bom = 0xFEFF } else if encoding == .utf16BigEndian { #if _endian(little) swap = true #else swap = false #endif bom = nil } else if encoding == .utf16LittleEndian { #if _endian(little) swap = false #else swap = true #endif bom = nil } else { fatalError("Unreachable") } // Grab this value once, as it requires doing a calculation over String's UTF8 storage let inputCount = self.utf16.count // The output may have 1 additional UTF16 character, if it has a BOM let outputCount = bom == nil ? inputCount : inputCount + 1 // Allocate enough memory to hold the UTF16 bytes after conversion. We will pass this off to Data. let utf16Pointer = calloc(outputCount, MemoryLayout.size)!.assumingMemoryBound(to: UInt16.self) let utf16Buffer = UnsafeMutableBufferPointer(start: utf16Pointer, count: outputCount) if let bom { // Put the BOM in, then copy the UTF16 bytes to the buffer after it. utf16Buffer[0] = bom let afterBOMBuffer = UnsafeMutableBufferPointer(rebasing: utf16Buffer[1..? = nil) -> Range? { var subStr = Substring(self) if let aRange { subStr = subStr[aRange] } return subStr._rangeOfCharacter(from: aSet, options: mask) } #endif // FOUNDATION_FRAMEWORK /// Returns a `Data` containing a representation of /// the `String` encoded using a given encoding. @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) public func data(using encoding: String.Encoding, allowLossyConversion: Bool = false) -> Data? { switch encoding { case .utf8: return Data(self.utf8) default: #if FOUNDATION_FRAMEWORK // TODO: Implement data(using:allowLossyConversion:) in Swift return _ns.data( using: encoding.rawValue, allowLossyConversion: allowLossyConversion) #else // Get a String, use the concrete implementation there return String(self).data(using: encoding, allowLossyConversion: allowLossyConversion) #endif } } /// Returns an array containing substrings from the string that have been /// divided by the given separator. /// /// The substrings in the resulting array appear in the same order as the /// original string. Adjacent occurrences of the separator string produce /// empty strings in the result. Similarly, if the string begins or ends /// with the separator, the first or last substring, respectively, is empty. /// The following example shows this behavior: /// /// let list1 = "Karin, Carrie, David" /// let items1 = list1.components(separatedBy: ", ") /// // ["Karin", "Carrie", "David"] /// /// // Beginning with the separator: /// let list2 = ", Norman, Stanley, Fletcher" /// let items2 = list2.components(separatedBy: ", ") /// // ["", "Norman", "Stanley", "Fletcher" /// /// If the list has no separators, the array contains only the original /// string itself. /// /// let name = "Karin" /// let list = name.components(separatedBy: ", ") /// // ["Karin"] /// /// - Parameter separator: The separator string. /// - Returns: An array containing substrings that have been divided from the /// string using `separator`. @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) public func components(separatedBy separator: T) -> [String] { #if FOUNDATION_FRAMEWORK if let contiguousSubstring = _asContiguousUTF8Substring(from: startIndex..) -> Range { let r = _lineBounds(around: range) return r.start ..< r.end } /// Returns the range of characters representing the /// paragraph or paragraphs containing a given range. @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) public func paragraphRange(for range: some RangeExpression) -> Range { let r = _paragraphBounds(around: range) return r.start ..< r.end } } extension StringProtocol { @inline(never) internal func _lineBounds( around range: some RangeExpression ) -> (start: Index, end: Index, contentsEnd: Index) { // Avoid generic paths in the common case by manually specializing on `String` and // `Substring`. Note that we're only ever calling `_lineBounds` on a `Substring`; this is // to reduce the code size overhead of having to specialize it multiple times (at a slight // cost to runtime performance). if let s = _specializingCast(self, to: String.self) { let range = s.unicodeScalars._boundaryAlignedRange(range) return s[...].utf8._lineBounds(around: range) } else if let s = _specializingCast(self, to: Substring.self) { let range = s.unicodeScalars._boundaryAlignedRange(range) return s.utf8._lineBounds(around: range) } else { // Unexpected case. `StringProtocol`'s UTF-8 view is not properly constrained, so we // need to convert `self` to a Substring and carefully convert indices between the two // collections before & after the _lineBounds call. let range = self.unicodeScalars._boundaryAlignedRange(range) let startUTF8Offset = self.utf8.distance(from: self.startIndex, to: range.lowerBound) let utf8Count = self.utf8.distance(from: range.lowerBound, to: range.upperBound) let s = Substring(self) let start = s.utf8.index(s.startIndex, offsetBy: startUTF8Offset) let end = s.utf8.index(start, offsetBy: utf8Count) let r = s.utf8._lineBounds(around: start ..< end) let resultUTF8Offsets = ( start: s.utf8.distance(from: s.startIndex, to: r.start), end: s.utf8.distance(from: s.startIndex, to: r.end), contentsEnd: s.utf8.distance(from: s.startIndex, to: r.contentsEnd)) return ( start: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.start), end: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.end), contentsEnd: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.contentsEnd)) } } @inline(never) internal func _paragraphBounds( around range: some RangeExpression ) -> (start: Index, end: Index, contentsEnd: Index) { // Avoid generic paths in the common case by manually specializing on `String` and // `Substring`. Note that we're only ever calling `_paragraphBounds` on a `Substring`; this is // to reduce the code size overhead of having to specialize it multiple times (at a slight // cost to runtime performance). if let s = _specializingCast(self, to: String.self) { let range = s.unicodeScalars._boundaryAlignedRange(range) return s[...].utf8._paragraphBounds(around: range) // Note: We use [...] to get a Substring } else if let s = _specializingCast(self, to: Substring.self) { let range = s.unicodeScalars._boundaryAlignedRange(range) return s.utf8._paragraphBounds(around: range) } else { // Unexpected case. `StringProtocol`'s UTF-8 view is not properly constrained, so we // need to convert `self` to a Substring and carefully convert indices between the two // collections before & after the _lineBounds call. let range = self.unicodeScalars._boundaryAlignedRange(range) let startUTF8Offset = self.utf8.distance(from: self.startIndex, to: range.lowerBound) let utf8Count = self.utf8.distance(from: range.lowerBound, to: range.upperBound) let s = Substring(self) let start = s.utf8.index(s.startIndex, offsetBy: startUTF8Offset) let end = s.utf8.index(start, offsetBy: utf8Count) let r = s.utf8._paragraphBounds(around: start ..< end) let resultUTF8Offsets = ( start: s.utf8.distance(from: s.startIndex, to: r.start), end: s.utf8.distance(from: s.startIndex, to: r.end), contentsEnd: s.utf8.distance(from: s.startIndex, to: r.contentsEnd)) return ( start: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.start), end: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.end), contentsEnd: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.contentsEnd)) } } }