//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

#if FOUNDATION_FRAMEWORK
internal import _ForSwiftFoundation
#endif

// These provides concrete implementations for String and Substring, enhancing performance over generic StringProtocol.

@available(FoundationPreview 0.4, *)
extension String {
    public func data(using encoding: String.Encoding, allowLossyConversion: Bool = false) -> Data? {
        switch encoding {
        case .utf8:
            return Data(self.utf8)
        case .ascii, .nonLossyASCII:
            if allowLossyConversion {
                let lossyReplacement = (encoding == .ascii) ? 0xFF : UInt8(ascii: "?")
                return Data(capacity: self.utf8.count) {
                    for scalar in self.unicodeScalars {
                        if scalar.isASCII {
                            $0.append(fromContentsOf: scalar.utf8)
                        } else {
                            $0.appendElement(lossyReplacement)
                        }
                    }
                }
            } else {
                var earlyCheckAllASCII = self.utf8.withContiguousStorageIfAvailable {
                    _allASCII($0)
                }
                if let earlyCheckAllASCII, !earlyCheckAllASCII {
                    return nil
                }
                var data = Data(count: self.utf8.count)
                let allASCII = data.withUnsafeMutableBytes {
                    $0.withMemoryRebound(to: UInt8.self) { buffer in
                        _ = buffer.initialize(fromContentsOf: self.utf8)
                        if let earlyCheckAllASCII {
                            return earlyCheckAllASCII
                        } else {
                            return _allASCII(UnsafeBufferPointer(buffer))
                        }
                    }
                }
                return allASCII ? data : nil
            }
        default:
#if FOUNDATION_FRAMEWORK
            // TODO: Implement data(using:allowLossyConversion:) in Swift
            return _ns.data(
                using: encoding.rawValue,
                allowLossyConversion: allowLossyConversion)
#else
            switch encoding {
            case .utf16BigEndian, .utf16LittleEndian:
                // This creates a contiguous storage for Data to simply memcpy, the most efficient way to give it bytes.
                return withUnsafeTemporaryAllocation(of: UInt8.self, capacity: self.utf16.count * 2) { utf16Buffer in
                    _ = utf16Buffer.initialize(from: UTF16ToDataAdaptor(self.utf16, endianness: Endianness(encoding)!))
                    defer { utf16Buffer.deinitialize() }
                    return Data(utf16Buffer)
                }
            case .utf16:
#if _endian(little)
                let data = Data([0xFF, 0xFE])
                let hostEncoding : String.Encoding = .utf16LittleEndian
#else
                let data = Data([0xFE, 0xFF])
                let hostEncoding : String.Encoding  = .utf16BigEndian
#endif
                guard let swapped = self.data(using: hostEncoding, allowLossyConversion: allowLossyConversion) else {
                    return nil
                }
                
                return data + swapped
            case .utf32BigEndian, .utf32LittleEndian:
                // This creates a contiguous storage for Data to simply memcpy, the most efficient way to give it bytes.
                return withUnsafeTemporaryAllocation(of: UInt8.self, capacity: self.unicodeScalars.count * 4) { utf32Buffer in
                    _ = utf32Buffer.initialize(from: UnicodeScalarToDataAdaptor(self.unicodeScalars, endianness: Endianness(encoding)!))
                    defer { utf32Buffer.deinitialize() }
                    return Data(utf32Buffer)
                }
            case .utf32:
#if _endian(little)
                let data = Data([0xFF, 0xFE, 0x00, 0x00])
                let hostEncoding : String.Encoding = .utf32LittleEndian
#else
                let data = Data([0x00, 0x00, 0xFE, 0xFF])
                let hostEncoding : String.Encoding = .utf32BigEndian
#endif
                guard let swapped = self.data(using: hostEncoding, allowLossyConversion: allowLossyConversion) else {
                    return nil
                }
                
                return data + swapped
            default:
                return nil
            }
#endif
        }
    }
}

@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
extension StringProtocol {
    /// A copy of the string with each word changed to its corresponding
    /// capitalized spelling.
    ///
    /// This property performs the canonical (non-localized) mapping. It is
    /// suitable for programming operations that require stable results not
    /// depending on the current locale.
    ///
    /// A capitalized string is a string with the first character in each word
    /// changed to its corresponding uppercase value, and all remaining
    /// characters set to their corresponding lowercase values. A "word" is any
    /// sequence of characters delimited by spaces, tabs, or line terminators.
    /// Some common word delimiting punctuation isn't considered, so this
    /// property may not generally produce the desired results for multiword
    /// strings. See the `getLineStart(_:end:contentsEnd:for:)` method for
    /// additional information.
    ///
    /// Case transformations aren’t guaranteed to be symmetrical or to produce
    /// strings of the same lengths as the originals.
    @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
    public var capitalized: String {
        String(self)._capitalized()
    }

#if FOUNDATION_FRAMEWORK
    /// Finds and returns the range in the `String` of the first
    /// character from a given character set found in a given range with
    /// given options.
    @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
    public func rangeOfCharacter(from aSet: CharacterSet, options mask: String.CompareOptions = [], range aRange: Range<Index>? = nil) -> Range<Index>? {
        var subStr = Substring(self)
        if let aRange {
            subStr = subStr[aRange]
        }
        return subStr._rangeOfCharacter(from: aSet, options: mask)
    }
#endif // FOUNDATION_FRAMEWORK

    /// Returns a `Data` containing a representation of
    /// the `String` encoded using a given encoding.
    @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
    public func data(using encoding: String.Encoding, allowLossyConversion: Bool = false) -> Data? {
        switch encoding {
        case .utf8:
            return Data(self.utf8)
        default:
#if FOUNDATION_FRAMEWORK
            // TODO: Implement data(using:allowLossyConversion:) in Swift
            return _ns.data(
                using: encoding.rawValue,
                allowLossyConversion: allowLossyConversion)
#else
            // Get a String, use the concrete implementation there
            return String(self).data(using: encoding, allowLossyConversion: allowLossyConversion)
#endif
        }
    }

    /// Returns an array containing substrings from the string that have been
    /// divided by the given separator.
    ///
    /// The substrings in the resulting array appear in the same order as the
    /// original string. Adjacent occurrences of the separator string produce
    /// empty strings in the result. Similarly, if the string begins or ends
    /// with the separator, the first or last substring, respectively, is empty.
    /// The following example shows this behavior:
    ///
    ///     let list1 = "Karin, Carrie, David"
    ///     let items1 = list1.components(separatedBy: ", ")
    ///     // ["Karin", "Carrie", "David"]
    ///
    ///     // Beginning with the separator:
    ///     let list2 = ", Norman, Stanley, Fletcher"
    ///     let items2 = list2.components(separatedBy: ", ")
    ///     // ["", "Norman", "Stanley", "Fletcher"
    ///
    /// If the list has no separators, the array contains only the original
    /// string itself.
    ///
    ///     let name = "Karin"
    ///     let list = name.components(separatedBy: ", ")
    ///     // ["Karin"]
    ///
    /// - Parameter separator: The separator string.
    /// - Returns: An array containing substrings that have been divided from the
    ///   string using `separator`.
    @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
    public func components<T : StringProtocol>(separatedBy separator: T) -> [String] {
#if FOUNDATION_FRAMEWORK
        if let contiguousSubstring = _asContiguousUTF8Substring(from: startIndex..<endIndex) {
            let options: String.CompareOptions
            if separator == "\n" {
                // 106365366: Some clients intend to separate strings whose line separator is "\r\n" with "\n".
                // Maintain compatibility with `.literal` so that "\n" can match that in "\r\n" on the unicode scalar level.
                options = [.literal]
            } else {
                options = []
            }

            do {
                return try contiguousSubstring._components(separatedBy: Substring(separator), options: options)
            } catch {
                // Otherwise, inputs were unsupported - fallthrough to NSString implementation for compatibility
            }
        }

        return _ns.components(separatedBy: separator._ephemeralString)
#else
        do {
            return try Substring(self)._components(separatedBy: Substring(separator), options: [])
        } catch {
            return [String(self)]
        }
#endif
    }

    /// Returns the range of characters representing the line or lines
    /// containing a given range.
    @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
    public func lineRange(for range: some RangeExpression<Index>) -> Range<Index> {
        let r = _lineBounds(around: range)
        return r.start ..< r.end
    }

    /// Returns the range of characters representing the
    /// paragraph or paragraphs containing a given range.
    @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
    public func paragraphRange(for range: some RangeExpression<Index>) -> Range<Index> {
        let r = _paragraphBounds(around: range)
        return r.start ..< r.end
    }
}

extension StringProtocol {
    @inline(never)
    internal func _lineBounds(
        around range: some RangeExpression<Index>
    ) -> (start: Index, end: Index, contentsEnd: Index) {
        // Avoid generic paths in the common case by manually specializing on `String` and
        // `Substring`. Note that we're only ever calling `_lineBounds` on a `Substring`; this is
        // to reduce the code size overhead of having to specialize it multiple times (at a slight
        // cost to runtime performance).
        if let s = _specializingCast(self, to: String.self) {
            let range = s.unicodeScalars._boundaryAlignedRange(range)
            return s[...].utf8._lineBounds(around: range)
        } else if let s = _specializingCast(self, to: Substring.self) {
            let range = s.unicodeScalars._boundaryAlignedRange(range)
            return s.utf8._lineBounds(around: range)
        } else {
            // Unexpected case. `StringProtocol`'s UTF-8 view is not properly constrained, so we
            // need to convert `self` to a Substring and carefully convert indices between the two
            // collections before & after the _lineBounds call.
            let range = self.unicodeScalars._boundaryAlignedRange(range)

            let startUTF8Offset = self.utf8.distance(from: self.startIndex, to: range.lowerBound)
            let utf8Count = self.utf8.distance(from: range.lowerBound, to: range.upperBound)

            let s = Substring(self)
            let start = s.utf8.index(s.startIndex, offsetBy: startUTF8Offset)
            let end = s.utf8.index(start, offsetBy: utf8Count)
            let r = s.utf8._lineBounds(around: start ..< end)

            let resultUTF8Offsets = (
                start: s.utf8.distance(from: s.startIndex, to: r.start),
                end: s.utf8.distance(from: s.startIndex, to: r.end),
                contentsEnd: s.utf8.distance(from: s.startIndex, to: r.contentsEnd))
            return (
                start: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.start),
                end: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.end),
                contentsEnd: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.contentsEnd))
        }
    }

    @inline(never)
    internal func _paragraphBounds(
        around range: some RangeExpression<Index>
    ) -> (start: Index, end: Index, contentsEnd: Index) {
        // Avoid generic paths in the common case by manually specializing on `String` and
        // `Substring`. Note that we're only ever calling `_paragraphBounds` on a `Substring`; this is
        // to reduce the code size overhead of having to specialize it multiple times (at a slight
        // cost to runtime performance).
        if let s = _specializingCast(self, to: String.self) {
            let range = s.unicodeScalars._boundaryAlignedRange(range)
            return s[...].utf8._paragraphBounds(around: range) // Note: We use [...] to get a Substring
        } else if let s = _specializingCast(self, to: Substring.self) {
            let range = s.unicodeScalars._boundaryAlignedRange(range)
            return s.utf8._paragraphBounds(around: range)
        } else {
            // Unexpected case. `StringProtocol`'s UTF-8 view is not properly constrained, so we
            // need to convert `self` to a Substring and carefully convert indices between the two
            // collections before & after the _lineBounds call.
            let range = self.unicodeScalars._boundaryAlignedRange(range)

            let startUTF8Offset = self.utf8.distance(from: self.startIndex, to: range.lowerBound)
            let utf8Count = self.utf8.distance(from: range.lowerBound, to: range.upperBound)

            let s = Substring(self)
            let start = s.utf8.index(s.startIndex, offsetBy: startUTF8Offset)
            let end = s.utf8.index(start, offsetBy: utf8Count)
            let r = s.utf8._paragraphBounds(around: start ..< end)

            let resultUTF8Offsets = (
                start: s.utf8.distance(from: s.startIndex, to: r.start),
                end: s.utf8.distance(from: s.startIndex, to: r.end),
                contentsEnd: s.utf8.distance(from: s.startIndex, to: r.contentsEnd))
            return (
                start: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.start),
                end: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.end),
                contentsEnd: self.utf8.index(self.startIndex, offsetBy: resultUTF8Offsets.contentsEnd))
        }
    }
}