//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2024 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #if FOUNDATION_FRAMEWORK internal import _ForSwiftFoundation #endif fileprivate let stringEncodingAttributeName = "com.apple.TextEncoding" @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) extension String { /// Returns a `String` initialized by converting given `data` into /// Unicode characters using a given `encoding`. public init?(data: __shared Data, encoding: Encoding) { guard let s = String(bytes: data, encoding: encoding) else { return nil } self = s } /// Creates a new string equivalent to the given bytes interpreted in the /// specified encoding. /// /// - Parameters: /// - bytes: A sequence of bytes to interpret using `encoding`. /// - encoding: The encoding to use to interpret `bytes`. public init?(bytes: __shared S, encoding: Encoding) where S.Iterator.Element == UInt8 { switch encoding { case .ascii: func makeString(buffer: UnsafeBufferPointer) -> String? { return String(_validating: buffer, as: Unicode.ASCII.self) } if let string = bytes.withContiguousStorageIfAvailable(makeString) ?? Array(bytes).withUnsafeBufferPointer(makeString) { self = string } else { return nil } case .utf8: func makeString(buffer: UnsafeBufferPointer) -> String? { if let string = String._tryFromUTF8(buffer) { return string } return String(_validating: buffer, as: UTF8.self) } if let string = bytes.withContiguousStorageIfAvailable(makeString) ?? Array(bytes).withUnsafeBufferPointer(makeString) { self = string } else { return nil } case .utf16BigEndian, .utf16LittleEndian, .utf16: // See also the package extension String?(_utf16:), which does something similar to this without the swapping of big/little. let e = Endianness(encoding) let maybe = bytes.withContiguousStorageIfAvailable { buffer -> String? in withUnsafeTemporaryAllocation(of: UTF8.CodeUnit.self, capacity: buffer.count * 3) { contents in let s = UTF16EndianAdaptor(buffer, endianness: e) var count = 0 let error = transcode(s.makeIterator(), from: UTF16.self, to: UTF8.self, stoppingOnError: true) { codeUnit in contents[count] = codeUnit count += 1 } guard !error else { return nil } // Unfortunately no way to skip the validation inside String at this time return String._tryFromUTF8(UnsafeBufferPointer(rebasing: contents[.. String? in withUnsafeTemporaryAllocation(of: UTF8.CodeUnit.self, capacity: buffer.count * 3) { contents in let s = UTF32EndianAdaptor(buffer, endianness: e) var count = 0 let error = transcode(s.makeIterator(), from: UTF32.self, to: UTF8.self, stoppingOnError: true) { codeUnit in contents[count] = codeUnit count += 1 } guard !error else { return nil } // Unfortunately no way to skip the validation inside String at this time return String._tryFromUTF8(UnsafeBufferPointer(rebasing: contents[..) -> String? { if let ns = NSString(bytes: bytes.baseAddress.unsafelyUnwrapped, length: bytes.count, encoding: encoding.rawValue) { return String._unconditionallyBridgeFromObjectiveC(ns) } else { return nil } } if let string = (bytes.withContiguousStorageIfAvailable(makeNSString) ?? Array(bytes).withUnsafeBufferPointer(makeNSString)) { self = string } else { return nil } #else return nil #endif } } #if !NO_FILESYSTEM /// Produces a string created by reading data from the file at a given path interpreted using a given encoding. public init(contentsOfFile path: __shared String, encoding enc: Encoding) throws { let data = try Data(contentsOfFile: path) guard let str = String(data: data, encoding: enc) else { throw CocoaError(.fileReadCorruptFile) } self = str } /// Produces a string created by reading data from a given URL interpreted using a given encoding. public init(contentsOf url: __shared URL, encoding enc: Encoding) throws { let data = try Data(contentsOf: url) guard let str = String(data: data, encoding: enc) else { throw CocoaError(.fileReadCorruptFile) } self = str } /// Produces a string created by reading data from the file at a given path and returns by reference the encoding used to interpret the file. public init(contentsOfFile path: __shared String, usedEncoding: inout Encoding) throws { self = try String(contentsOfFileOrPath: .path(path), usedEncoding: &usedEncoding) } /// Produces a string created by reading data from a given URL and returns by reference the encoding used to interpret the data. public init(contentsOf url: __shared URL, usedEncoding: inout Encoding) throws { self = try String(contentsOfFileOrPath: .url(url), usedEncoding: &usedEncoding) } internal init(contentsOfFileOrPath path: PathOrURL, usedEncoding: inout Encoding) throws { var attrs: [String : Data] = [:] let data = try readDataFromFile(path: path, reportProgress: false, maxLength: nil, options: [], attributesToRead: [stringEncodingAttributeName], attributes: &attrs) if let encodingAttributeData = attrs[stringEncodingAttributeName], let extendedAttributeEncoding = encodingFromDataForExtendedAttribute(encodingAttributeData) { guard let str = String(data: data, encoding: extendedAttributeEncoding) else { throw CocoaError(.fileReadCorruptFile) } usedEncoding = extendedAttributeEncoding self = str } else { guard let str = String(dataOfUnknownEncoding: data, usedEncoding: &usedEncoding) else { throw CocoaError(.fileReadCorruptFile) } self = str } } #endif } extension String { internal init?(dataOfUnknownEncoding data: Data, usedEncoding: inout Encoding) { let len = data.count let encoding: Encoding if len >= 4 && ( (data[0] == 0xFF && data[1] == 0xFE && data[2] == 0x00 && data[3] == 0x00) || (data[0] == 0x00 && data[1] == 0x00 && data[3] == 0xFE && data[4] == 0xFF)) { // Looks like UTF32 encoding = .utf32 } else if len >= 2 { if ((len & 1) == 0) && ((data[0] == 0xfe && data[1] == 0xff) || (data[0] == 0xff && data[1] == 0xfe)) { // Looks like Unicode encoding = .unicode } else { // Fallback encoding = .utf8 } } else { // Fallback, short string encoding = .utf8 } guard let str = String(data: data, encoding: encoding) else { return nil } usedEncoding = encoding self = str } } internal func encodingFromDataForExtendedAttribute(_ value: Data) -> String.Encoding? { guard let str = String(data: value, encoding: .utf8) else { return nil } // First look for the integer at the end var foundEncoding: String.Encoding? let colonIndex = str.firstIndex(of: ";") if let colonIndex { let next = str.index(after: colonIndex) if next < str.endIndex { let rest = str[next.. Data? { #if FOUNDATION_FRAMEWORK let cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding.rawValue) guard cfEncoding != kCFStringEncodingInvalidId else { return nil } let encodingName = CFStringConvertEncodingToIANACharSetName(cfEncoding) #else let cfEncoding : UInt? = switch encoding { case .ascii: 0x0600 case .utf8: 0x08000100 case .utf16: 0x0100 case .utf16BigEndian: 0x10000100 case .utf16LittleEndian: 0x14000100 case .utf32: 0x0c000100 case .utf32BigEndian: 0x18000100 case .utf32LittleEndian: 0x1c000100 default: nil } guard let cfEncoding else { return nil } let encodingName : String? = switch encoding { case .ascii: "us-ascii" case .utf8: "utf-8" case .utf16: "utf-16" case .utf16BigEndian: "utf-16be" case .utf16LittleEndian: "utf-16le" case .utf32: "utf-32" case .utf32BigEndian: "utf-32be" case .utf32LittleEndian: "utf-32le" default: nil } #endif if let encodingName { return "\(encodingName);\(cfEncoding)".data(using: .utf8) } return ";\(cfEncoding)".data(using: .utf8) } // MARK: - Writing @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) extension StringProtocol { #if !NO_FILESYSTEM /// Writes the contents of the `String` to a file at a given path using a given encoding. public func write(toFile path: T, atomically useAuxiliaryFile: Bool, encoding enc: String.Encoding) throws { guard let data = data(using: enc) else { throw CocoaError(.fileWriteInapplicableStringEncoding) } let attributes : [String : Data] if let extendedAttributeData = extendedAttributeData(for: enc) { attributes = [stringEncodingAttributeName : extendedAttributeData] } else { attributes = [:] } let options : Data.WritingOptions = useAuxiliaryFile ? [.atomic] : [] try writeToFile(path: .path(String(path)), data: data, options: options, attributes: attributes, reportProgress: false) } /// Writes the contents of the `String` to the URL specified by url using the specified encoding. public func write(to url: URL, atomically useAuxiliaryFile: Bool, encoding enc: String.Encoding) throws { guard let data = data(using: enc) else { throw CocoaError(.fileWriteInapplicableStringEncoding) } let attributes : [String : Data] if let extendedAttributeData = extendedAttributeData(for: enc) { attributes = [stringEncodingAttributeName : extendedAttributeData] } else { attributes = [:] } let options : Data.WritingOptions = useAuxiliaryFile ? [.atomic] : [] try writeToFile(path: .url(url), data: data, options: options, attributes: attributes, reportProgress: false) } #endif } // TODO: This is part of the stdlib as of 5.11. This is a copy to support building on previous Swift stdlib versions, but should be replaced with the stdlib one as soon as possible. extension String { internal init?(_validating codeUnits: some Sequence, as encoding: Encoding.Type) { var transcoded: [UTF8.CodeUnit] = [] transcoded.reserveCapacity(codeUnits.underestimatedCount) var isASCII = true let error = transcode( codeUnits.makeIterator(), from: Encoding.self, to: UTF8.self, stoppingOnError: true, into: { uint8 in transcoded.append(uint8) if isASCII && (uint8 & 0x80) == 0x80 { isASCII = false } } ) if error { return nil } let res = transcoded.withUnsafeBufferPointer{ String._tryFromUTF8($0) } if let res { self = res } else { return nil } } }