//===----------------------------------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2023 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #if FOUNDATION_FRAMEWORK @_spi(_Unicode) import Swift internal import Foundation_Private.NSString #endif #if canImport(Darwin) import Darwin #endif #if os(Windows) import WinSDK extension String { package func withNTPathRepresentation(_ body: (UnsafePointer) throws -> Result) throws -> Result { guard !isEmpty else { throw CocoaError.errorWithFilePath(.fileReadInvalidFileName, "") } var iter = self.utf8.makeIterator() let bLeadingSlash = if [._slash, ._backslash].contains(iter.next()), iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false } // Strip the leading `/` on a RFC8089 path (`/[drive-letter]:/...` ). A // leading slash indicates a rooted path on the drive for the current // working directory. return try Substring(self.utf8.dropFirst(bLeadingSlash ? 1 : 0)).withCString(encodedAs: UTF16.self) { pwszPath in // 1. Normalize the path first. let dwLength: DWORD = GetFullPathNameW(pwszPath, 0, nil, nil) return try withUnsafeTemporaryAllocation(of: WCHAR.self, capacity: Int(dwLength)) { guard GetFullPathNameW(pwszPath, DWORD($0.count), $0.baseAddress, nil) > 0 else { throw CocoaError.errorWithFilePath(self, win32: GetLastError(), reading: true) } // 2. Perform the operation on the normalized path. return try body($0.baseAddress!) } } } } #endif extension String { package func _trimmingWhitespace() -> String { if self.isEmpty { return "" } return String(unicodeScalars._trimmingCharacters { $0.properties.isWhitespace }) } package init?(_utf16 input: UnsafeBufferPointer) { // Allocate input.count * 3 code points since one UTF16 code point may require up to three UTF8 code points when transcoded let str = withUnsafeTemporaryAllocation(of: UTF8.CodeUnit.self, capacity: input.count * 3) { contents in var count = 0 let error = transcode(input.makeIterator(), from: UTF16.self, to: UTF8.self, stoppingOnError: true) { codeUnit in contents[count] = codeUnit count += 1 } guard !error else { return nil as String? } return String._tryFromUTF8(UnsafeBufferPointer(rebasing: contents[.., count: Int) { guard let str = String(_utf16: UnsafeBufferPointer(rebasing: input[.., count: Int) { guard let str = String(_utf16: UnsafeBufferPointer(start: input, count: count)) else { return nil } self = str } enum _NormalizationType { case canonical case hfsPlus fileprivate var setType: BuiltInUnicodeScalarSet.SetType { switch self { case .canonical: .canonicalDecomposable case .hfsPlus: .hfsPlusDecomposable } } } private func _decomposed(_ type: String._NormalizationType, into buffer: UnsafeMutableBufferPointer, nullTerminated: Bool = false) -> Int? { var copy = self return copy.withUTF8 { try? $0._decomposed(type, as: Unicode.UTF8.self, into: buffer, nullTerminated: nullTerminated) } } #if canImport(Darwin) || FOUNDATION_FRAMEWORK fileprivate func _fileSystemRepresentation(into buffer: UnsafeMutableBufferPointer) -> Bool { let result = buffer.withMemoryRebound(to: UInt8.self) { rebound in _decomposed(.hfsPlus, into: rebound, nullTerminated: true) } return result != nil } private var maxFileSystemRepresentationSize: Int { // The Darwin file system representation expands the UTF-8 contents to decomposed UTF-8 contents (only decomposing specific scalars) // For any given scalar that we decompose, we will increase its UTF-8 length by at most a factor of 3 during decomposition // (ex. U+0390 expands from 2 to 6 UTF-8 code-units, U+1D160 expands from 4 to 12 UTF-8 code-units) // Therefore in the worst case scenario, the result will be the UTF-8 length multiplied by a factor of 3 plus an additional byte for the null byte self.utf8.count * 3 + 1 } #endif package func withFileSystemRepresentation(_ block: (UnsafePointer?) throws -> R) rethrows -> R { #if canImport(Darwin) || FOUNDATION_FRAMEWORK try withUnsafeTemporaryAllocation(of: CChar.self, capacity: maxFileSystemRepresentationSize) { buffer in guard _fileSystemRepresentation(into: buffer) else { return try block(nil) } return try block(buffer.baseAddress!) } #else #if os(Windows) var iter = self.utf8.makeIterator() let bLeadingSlash = if iter.next() == ._slash, iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false } // Strip the leading `/` on a RFC8089 path (`/[drive-letter]:/...` ). A // leading slash indicates a rooted path on the drive for the current // working directory. return try Substring(self.utf8.dropFirst(bLeadingSlash ? 1 : 0)).replacing(._slash, with: ._backslash).withCString { try block($0) } #else return try withCString { try block($0) } #endif #endif } package func withMutableFileSystemRepresentation(_ block: (UnsafeMutablePointer?) throws -> R) rethrows -> R { #if canImport(Darwin) || FOUNDATION_FRAMEWORK try withUnsafeTemporaryAllocation(of: CChar.self, capacity: maxFileSystemRepresentationSize) { buffer in guard _fileSystemRepresentation(into: buffer) else { return try block(nil) } return try block(buffer.baseAddress!) } #else #if os(Windows) var iter = self.utf8.makeIterator() let bLeadingSlash = if iter.next() == ._slash, iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false } var mut: String = Substring(self.utf8[self.utf8.index(self.utf8.startIndex, offsetBy: bLeadingSlash ? 1 : 0)...]) .replacing(._slash, with: ._backslash) #else var mut: String = self #endif return try mut.withUTF8 { utf8Buffer in // Leave space for a null byte at the end try withUnsafeTemporaryAllocation(of: CChar.self, capacity: utf8Buffer.count + 1) { temporaryBuffer in try utf8Buffer.withMemoryRebound(to: CChar.self) { utf8CCharBuffer in let nullByteIndex = temporaryBuffer.initialize(fromContentsOf: utf8CCharBuffer) // Null-terminate temporaryBuffer.initializeElement(at: nullByteIndex, to: CChar(0)) let result = try block(temporaryBuffer.baseAddress) temporaryBuffer.prefix(through: nullByteIndex).deinitialize() return result } } } #endif } } extension UnsafeBufferPointer { private enum DecompositionError : Error { case insufficientSpace case illegalScalar case decodingError } fileprivate func _decomposedRebinding(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer, nullTerminated: Bool = false) throws -> Int { try self.withMemoryRebound(to: T.CodeUnit.self) { reboundSelf in try buffer.withMemoryRebound(to: Unicode.UTF8.CodeUnit.self) { reboundBuffer in try reboundSelf._decomposed(type, as: codec, into: reboundBuffer, nullTerminated: nullTerminated) } } } fileprivate func _decomposed(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer, nullTerminated: Bool = false) throws -> Int where Element == T.CodeUnit { let scalarSet = BuiltInUnicodeScalarSet(type: type.setType) var bufferIdx = 0 let bufferLength = buffer.count var sortBuffer: [UnicodeScalar] = [] var seenNullIdx: Int? = nil var decoder = T() var iterator = self.makeIterator() guard !buffer.isEmpty else { if !nullTerminated && iterator.next() == nil { // No bytes to write, so an empty buffer is OK return 0 } else { throw DecompositionError.insufficientSpace } } defer { if nullTerminated { // Ensure buffer is always null-terminated even on failure to prevent buffer over-reads // At this point, the buffer is known to be non-empty, so it must have space for at least a null terminating byte (even if it overwrites the final output byte in the buffer) buffer[buffer.count - 1] = 0 } } func appendOutput(_ values: some Collection) throws { let bufferPortion = UnsafeMutableBufferPointer(start: buffer.baseAddress!.advanced(by: bufferIdx), count: bufferLength - bufferIdx) guard bufferPortion.count >= values.count else { throw DecompositionError.insufficientSpace } bufferIdx += bufferPortion.initialize(fromContentsOf: values) } func appendOutput(_ value: UInt8) throws { guard bufferIdx < bufferLength else { throw DecompositionError.insufficientSpace } buffer.initializeElement(at: bufferIdx, to: value) bufferIdx += 1 } func encodedScalar(_ scalar: UnicodeScalar) throws -> some Collection { guard let encoded = UTF8.encode(scalar) else { throw DecompositionError.illegalScalar } return encoded } func fillFromSortBuffer() throws { guard !sortBuffer.isEmpty else { return } sortBuffer.sort { $0.properties.canonicalCombiningClass.rawValue < $1.properties.canonicalCombiningClass.rawValue } for scalar in sortBuffer { try appendOutput(encodedScalar(scalar)) } sortBuffer.removeAll(keepingCapacity: true) } decodingLoop: while bufferIdx < bufferLength { var scalar: UnicodeScalar switch decoder.decode(&iterator) { // We've finished the input, return the index case .emptyInput: break decodingLoop case .error: throw DecompositionError.decodingError case .scalarValue(let v): scalar = v } if scalar.value == 0 { // Null bytes within the string are fine as long as they are at the end seenNullIdx = bufferIdx } else if seenNullIdx != nil { // File system representations are c-strings that do not support embedded null bytes throw DecompositionError.illegalScalar } let isASCII = scalar.isASCII if isASCII || scalar.properties.canonicalCombiningClass == .notReordered { try fillFromSortBuffer() } if isASCII { try appendOutput(UInt8(scalar.value)) } else { #if FOUNDATION_FRAMEWORK // Only decompose scalars present in the declared set if scalarSet.contains(scalar) { sortBuffer.append(contentsOf: String(scalar)._nfd) } else { // Even if a scalar isn't decomposed, it may still need to be re-ordered sortBuffer.append(scalar) } #else // TODO: Implement Unicode decomposition in swift-foundation sortBuffer.append(scalar) #endif } } try fillFromSortBuffer() if iterator.next() != nil { throw DecompositionError.insufficientSpace } else { if let seenNullIdx { return seenNullIdx + 1 } if nullTerminated { try appendOutput(0) } return bufferIdx } } } #if FOUNDATION_FRAMEWORK @objc extension NSString { @objc func __swiftFillFileSystemRepresentation(pointer: UnsafeMutablePointer, maxLength: Int) -> Bool { autoreleasepool { let buffer = UnsafeMutableBufferPointer(start: pointer, count: maxLength) guard !buffer.isEmpty else { // No space for a null terminating byte, so it's not worth even trying to read the string contents return false } // See if we have a quick-access buffer we can just convert directly if let fastCharacters = self._fastCharacterContents() { // If we have quick access to UTF-16 contents, decompose from UTF-16 let charsBuffer = UnsafeBufferPointer(start: fastCharacters, count: self.length) return (try? charsBuffer._decomposedRebinding(.hfsPlus, as: Unicode.UTF16.self, into: buffer, nullTerminated: true)) != nil } else if self.fastestEncoding == NSASCIIStringEncoding, let fastUTF8 = self._fastCStringContents(false) { // If we have quick access to ASCII contents, no need to decompose let utf8Buffer = UnsafeBufferPointer(start: fastUTF8, count: self.length) defer { // Ensure buffer is always null-terminated even on failure to prevent buffer over-reads // At this point, the buffer is known to be non-empty, so it must have space for at least a null terminating byte (even if it overwrites the final output byte in the buffer) buffer[buffer.count - 1] = 0 } // We only allow embedded nulls if there are no non-null characters following the first null character if let embeddedNullIdx = utf8Buffer.firstIndex(of: 0) { if !utf8Buffer[embeddedNullIdx...].allSatisfy({ $0 == 0 }) { return false } } var (leftoverIterator, next) = buffer.initialize(from: utf8Buffer) guard leftoverIterator.next() == nil && next < buffer.endIndex else { return false } buffer[next] = 0 return true } else { // Otherwise, bridge to a String which will create a UTF-8 buffer return String(self)._fileSystemRepresentation(into: buffer) } } } } #endif