mirror of
https://github.com/apple/swift-foundation.git
synced 2025-05-23 14:00:14 +08:00
On Windows, there is a built-in maximum path limitation of 260 characters under most conditions. This can be extended to 32767 characters under either of the following two conditions: - Adding the longPathAware attribute to the executable's manifest AND enabling the LongPathsEnabled system-wide registry key or group policy. - Ensuring fully qualified paths passed to Win32 APIs are prefixed with \?\ Unfortunately, the former is not realistic for the Swift ecosystem, since it requires developers to have awareness of this specific Windows limitation, AND set longPathAware in their apps' manifest AND expect end users of those apps to change their system configuration. Instead, this patch transparently prefixes all eligible paths in calls to Win32 APIs with the \?\ prefix to allow them to work with paths longer than 260 characters without requiring the caller of Foundation to manually prefix the paths. See https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation for more info.
417 lines
18 KiB
Swift
417 lines
18 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2023 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#if FOUNDATION_FRAMEWORK
|
|
@_spi(_Unicode) import Swift
|
|
internal import Foundation_Private.NSString
|
|
#endif
|
|
|
|
#if canImport(Darwin)
|
|
import Darwin
|
|
#endif
|
|
|
|
#if os(Windows)
|
|
import WinSDK
|
|
|
|
extension String {
|
|
/// Invokes `body` with a resolved and potentially `\\?\`-prefixed version of the pointee,
|
|
/// to ensure long paths greater than MAX_PATH (260) characters are handled correctly.
|
|
///
|
|
/// - parameter relative: Returns the original path without transforming through GetFullPathNameW + PathCchCanonicalizeEx, if the path is relative.
|
|
/// - seealso: https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
|
|
package func withNTPathRepresentation<Result>(relative: Bool = false, _ body: (UnsafePointer<WCHAR>) throws -> Result) throws -> Result {
|
|
guard !isEmpty else {
|
|
throw CocoaError.errorWithFilePath(.fileReadInvalidFileName, "")
|
|
}
|
|
|
|
var iter = self.utf8.makeIterator()
|
|
let bLeadingSlash = if [._slash, ._backslash].contains(iter.next()), iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false }
|
|
|
|
// Strip the leading `/` on a RFC8089 path (`/[drive-letter]:/...` ). A
|
|
// leading slash indicates a rooted path on the drive for the current
|
|
// working directory.
|
|
return try Substring(self.utf8.dropFirst(bLeadingSlash ? 1 : 0)).withCString(encodedAs: UTF16.self) { pwszPath in
|
|
if relative && PathIsRelativeW(pwszPath) {
|
|
return try body(pwszPath)
|
|
}
|
|
|
|
// 1. Normalize the path first.
|
|
// Contrary to the documentation, this works on long paths independently
|
|
// of the registry or process setting to enable long paths (but it will also
|
|
// not add the \\?\ prefix required by other functions under these conditions).
|
|
let dwLength: DWORD = GetFullPathNameW(pwszPath, 0, nil, nil)
|
|
return try withUnsafeTemporaryAllocation(of: WCHAR.self, capacity: Int(dwLength)) { pwszFullPath in
|
|
guard (1..<dwLength).contains(GetFullPathNameW(pwszPath, DWORD(pwszFullPath.count), pwszFullPath.baseAddress, nil)) else {
|
|
throw CocoaError.errorWithFilePath(self, win32: GetLastError(), reading: true)
|
|
}
|
|
|
|
// 1.5 Leave \\.\ prefixed paths alone since device paths are already an exact representation and PathCchCanonicalizeEx will mangle these.
|
|
if let base = pwszFullPath.baseAddress,
|
|
base[0] == UInt16(UInt8._backslash),
|
|
base[1] == UInt16(UInt8._backslash),
|
|
base[2] == UInt16(UInt8._period),
|
|
base[3] == UInt16(UInt8._backslash) {
|
|
return try body(base)
|
|
}
|
|
|
|
// 2. Canonicalize the path.
|
|
// This will add the \\?\ prefix if needed based on the path's length.
|
|
var pwszCanonicalPath: LPWSTR?
|
|
let flags: ULONG = PATHCCH_ALLOW_LONG_PATHS
|
|
let result = PathAllocCanonicalize(pwszFullPath.baseAddress, flags, &pwszCanonicalPath)
|
|
if let pwszCanonicalPath {
|
|
defer { LocalFree(pwszCanonicalPath) }
|
|
if result == S_OK {
|
|
// 3. Perform the operation on the normalized path.
|
|
return try body(pwszCanonicalPath)
|
|
}
|
|
}
|
|
throw CocoaError.errorWithFilePath(self, win32: WIN32_FROM_HRESULT(result), reading: true)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
extension String {
|
|
package func _trimmingWhitespace() -> String {
|
|
if self.isEmpty {
|
|
return ""
|
|
}
|
|
|
|
return String(unicodeScalars._trimmingCharacters {
|
|
$0.properties.isWhitespace
|
|
})
|
|
}
|
|
|
|
package init?(_utf16 input: UnsafeBufferPointer<UInt16>) {
|
|
// Allocate input.count * 3 code points since one UTF16 code point may require up to three UTF8 code points when transcoded
|
|
let str = withUnsafeTemporaryAllocation(of: UTF8.CodeUnit.self, capacity: input.count * 3) { contents in
|
|
var count = 0
|
|
let error = transcode(input.makeIterator(), from: UTF16.self, to: UTF8.self, stoppingOnError: true) { codeUnit in
|
|
contents[count] = codeUnit
|
|
count += 1
|
|
}
|
|
|
|
guard !error else {
|
|
return nil as String?
|
|
}
|
|
|
|
return String._tryFromUTF8(UnsafeBufferPointer(rebasing: contents[..<count]))
|
|
}
|
|
|
|
guard let str else {
|
|
return nil
|
|
}
|
|
self = str
|
|
}
|
|
|
|
package init?(_utf16 input: UnsafeMutableBufferPointer<UInt16>, count: Int) {
|
|
guard let str = String(_utf16: UnsafeBufferPointer(rebasing: input[..<count])) else {
|
|
return nil
|
|
}
|
|
self = str
|
|
}
|
|
|
|
package init?(_utf16 input: UnsafePointer<UInt16>, count: Int) {
|
|
guard let str = String(_utf16: UnsafeBufferPointer(start: input, count: count)) else {
|
|
return nil
|
|
}
|
|
self = str
|
|
}
|
|
|
|
enum _NormalizationType {
|
|
case canonical
|
|
case hfsPlus
|
|
|
|
fileprivate var setType: BuiltInUnicodeScalarSet.SetType {
|
|
switch self {
|
|
case .canonical: .canonicalDecomposable
|
|
case .hfsPlus: .hfsPlusDecomposable
|
|
}
|
|
}
|
|
}
|
|
|
|
private func _decomposed(_ type: String._NormalizationType, into buffer: UnsafeMutableBufferPointer<UInt8>, nullTerminated: Bool = false) -> Int? {
|
|
var copy = self
|
|
return copy.withUTF8 {
|
|
try? $0._decomposed(type, as: Unicode.UTF8.self, into: buffer, nullTerminated: nullTerminated)
|
|
}
|
|
}
|
|
|
|
#if canImport(Darwin) || FOUNDATION_FRAMEWORK
|
|
fileprivate func _fileSystemRepresentation(into buffer: UnsafeMutableBufferPointer<CChar>) -> Bool {
|
|
let result = buffer.withMemoryRebound(to: UInt8.self) { rebound in
|
|
_decomposed(.hfsPlus, into: rebound, nullTerminated: true)
|
|
}
|
|
return result != nil
|
|
}
|
|
|
|
private var maxFileSystemRepresentationSize: Int {
|
|
// The Darwin file system representation expands the UTF-8 contents to decomposed UTF-8 contents (only decomposing specific scalars)
|
|
// For any given scalar that we decompose, we will increase its UTF-8 length by at most a factor of 3 during decomposition
|
|
// (ex. U+0390 expands from 2 to 6 UTF-8 code-units, U+1D160 expands from 4 to 12 UTF-8 code-units)
|
|
// Therefore in the worst case scenario, the result will be the UTF-8 length multiplied by a factor of 3 plus an additional byte for the null byte
|
|
self.utf8.count * 3 + 1
|
|
}
|
|
#endif
|
|
|
|
package func withFileSystemRepresentation<R>(_ block: (UnsafePointer<CChar>?) throws -> R) rethrows -> R {
|
|
#if canImport(Darwin) || FOUNDATION_FRAMEWORK
|
|
try withUnsafeTemporaryAllocation(of: CChar.self, capacity: maxFileSystemRepresentationSize) { buffer in
|
|
guard _fileSystemRepresentation(into: buffer) else {
|
|
return try block(nil)
|
|
}
|
|
return try block(buffer.baseAddress!)
|
|
}
|
|
#else
|
|
#if os(Windows)
|
|
var iter = self.utf8.makeIterator()
|
|
let bLeadingSlash = if iter.next() == ._slash, iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false }
|
|
// Strip the leading `/` on a RFC8089 path (`/[drive-letter]:/...` ). A
|
|
// leading slash indicates a rooted path on the drive for the current
|
|
// working directory.
|
|
return try Substring(self.utf8.dropFirst(bLeadingSlash ? 1 : 0)).replacing(._slash, with: ._backslash).withCString {
|
|
try block($0)
|
|
}
|
|
#else
|
|
return try withCString {
|
|
try block($0)
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
package func withMutableFileSystemRepresentation<R>(_ block: (UnsafeMutablePointer<CChar>?) throws -> R) rethrows -> R {
|
|
#if canImport(Darwin) || FOUNDATION_FRAMEWORK
|
|
try withUnsafeTemporaryAllocation(of: CChar.self, capacity: maxFileSystemRepresentationSize) { buffer in
|
|
guard _fileSystemRepresentation(into: buffer) else {
|
|
return try block(nil)
|
|
}
|
|
return try block(buffer.baseAddress!)
|
|
}
|
|
#else
|
|
#if os(Windows)
|
|
var iter = self.utf8.makeIterator()
|
|
let bLeadingSlash = if iter.next() == ._slash, iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false }
|
|
var mut: String =
|
|
Substring(self.utf8[self.utf8.index(self.utf8.startIndex, offsetBy: bLeadingSlash ? 1 : 0)...])
|
|
.replacing(._slash, with: ._backslash)
|
|
#else
|
|
var mut: String = self
|
|
#endif
|
|
|
|
return try mut.withUTF8 { utf8Buffer in
|
|
// Leave space for a null byte at the end
|
|
try withUnsafeTemporaryAllocation(of: CChar.self, capacity: utf8Buffer.count + 1) { temporaryBuffer in
|
|
try utf8Buffer.withMemoryRebound(to: CChar.self) { utf8CCharBuffer in
|
|
let nullByteIndex = temporaryBuffer.initialize(fromContentsOf: utf8CCharBuffer)
|
|
// Null-terminate
|
|
temporaryBuffer.initializeElement(at: nullByteIndex, to: CChar(0))
|
|
let result = try block(temporaryBuffer.baseAddress)
|
|
temporaryBuffer.prefix(through: nullByteIndex).deinitialize()
|
|
return result
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
}
|
|
|
|
extension UnsafeBufferPointer {
|
|
private enum DecompositionError : Error {
|
|
case insufficientSpace
|
|
case illegalScalar
|
|
case decodingError
|
|
}
|
|
|
|
fileprivate func _decomposedRebinding<T: UnicodeCodec, InputElement>(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer<InputElement>, nullTerminated: Bool = false) throws -> Int {
|
|
try self.withMemoryRebound(to: T.CodeUnit.self) { reboundSelf in
|
|
try buffer.withMemoryRebound(to: Unicode.UTF8.CodeUnit.self) { reboundBuffer in
|
|
try reboundSelf._decomposed(type, as: codec, into: reboundBuffer, nullTerminated: nullTerminated)
|
|
}
|
|
}
|
|
}
|
|
|
|
fileprivate func _decomposed<T: UnicodeCodec>(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer<UInt8>, nullTerminated: Bool = false) throws -> Int where Element == T.CodeUnit {
|
|
let scalarSet = BuiltInUnicodeScalarSet(type: type.setType)
|
|
var bufferIdx = 0
|
|
let bufferLength = buffer.count
|
|
var sortBuffer: [UnicodeScalar] = []
|
|
var seenNullIdx: Int? = nil
|
|
var decoder = T()
|
|
var iterator = self.makeIterator()
|
|
|
|
guard !buffer.isEmpty else {
|
|
if !nullTerminated && iterator.next() == nil {
|
|
// No bytes to write, so an empty buffer is OK
|
|
return 0
|
|
} else {
|
|
throw DecompositionError.insufficientSpace
|
|
}
|
|
}
|
|
|
|
defer {
|
|
if nullTerminated {
|
|
// Ensure buffer is always null-terminated even on failure to prevent buffer over-reads
|
|
// At this point, the buffer is known to be non-empty, so it must have space for at least a null terminating byte (even if it overwrites the final output byte in the buffer)
|
|
if bufferIdx < bufferLength {
|
|
// We still have space left in the buffer - if we haven't already null-terminated then add a null byte to the buffer
|
|
// Since we have space, we only want to write the null byte when/where we have to since some clients provide buffer sizes that don't match the true buffer length
|
|
if bufferIdx == buffer.startIndex || buffer[bufferIdx - 1] != 0 {
|
|
buffer[bufferIdx] = 0
|
|
}
|
|
} else {
|
|
// The buffer is non-empty but we've completely filled it, overwrite the last written byte with a null byte to ensure null termination
|
|
buffer[buffer.count - 1] = 0
|
|
}
|
|
}
|
|
}
|
|
|
|
func appendOutput(_ values: some Collection<UInt8>) throws {
|
|
let bufferPortion = UnsafeMutableBufferPointer(start: buffer.baseAddress!.advanced(by: bufferIdx), count: bufferLength - bufferIdx)
|
|
guard bufferPortion.count >= values.count else {
|
|
throw DecompositionError.insufficientSpace
|
|
}
|
|
bufferIdx += bufferPortion.initialize(fromContentsOf: values)
|
|
}
|
|
|
|
func appendOutput(_ value: UInt8) throws {
|
|
guard bufferIdx < bufferLength else {
|
|
throw DecompositionError.insufficientSpace
|
|
}
|
|
buffer.initializeElement(at: bufferIdx, to: value)
|
|
bufferIdx += 1
|
|
}
|
|
|
|
func encodedScalar(_ scalar: UnicodeScalar) throws -> some Collection<UInt8> {
|
|
guard let encoded = UTF8.encode(scalar) else {
|
|
throw DecompositionError.illegalScalar
|
|
}
|
|
return encoded
|
|
}
|
|
|
|
func fillFromSortBuffer() throws {
|
|
guard !sortBuffer.isEmpty else { return }
|
|
sortBuffer.sort {
|
|
$0.properties.canonicalCombiningClass.rawValue < $1.properties.canonicalCombiningClass.rawValue
|
|
}
|
|
for scalar in sortBuffer {
|
|
try appendOutput(encodedScalar(scalar))
|
|
}
|
|
sortBuffer.removeAll(keepingCapacity: true)
|
|
}
|
|
|
|
decodingLoop: while bufferIdx < bufferLength {
|
|
var scalar: UnicodeScalar
|
|
switch decoder.decode(&iterator) {
|
|
// We've finished the input, return the index
|
|
case .emptyInput: break decodingLoop
|
|
case .error: throw DecompositionError.decodingError
|
|
case .scalarValue(let v): scalar = v
|
|
}
|
|
|
|
if scalar.value == 0 {
|
|
// Null bytes within the string are fine as long as they are at the end
|
|
seenNullIdx = bufferIdx
|
|
} else if seenNullIdx != nil {
|
|
// File system representations are c-strings that do not support embedded null bytes
|
|
throw DecompositionError.illegalScalar
|
|
}
|
|
|
|
let isASCII = scalar.isASCII
|
|
if isASCII || scalar.properties.canonicalCombiningClass == .notReordered {
|
|
try fillFromSortBuffer()
|
|
}
|
|
|
|
if isASCII {
|
|
try appendOutput(UInt8(scalar.value))
|
|
} else {
|
|
#if FOUNDATION_FRAMEWORK
|
|
// Only decompose scalars present in the declared set
|
|
if scalarSet.contains(scalar) {
|
|
sortBuffer.append(contentsOf: String(scalar)._nfd)
|
|
} else {
|
|
// Even if a scalar isn't decomposed, it may still need to be re-ordered
|
|
sortBuffer.append(scalar)
|
|
}
|
|
#else
|
|
// TODO: Implement Unicode decomposition in swift-foundation
|
|
sortBuffer.append(scalar)
|
|
#endif
|
|
}
|
|
}
|
|
try fillFromSortBuffer()
|
|
|
|
if iterator.next() != nil {
|
|
throw DecompositionError.insufficientSpace
|
|
} else {
|
|
if let seenNullIdx {
|
|
return seenNullIdx + 1
|
|
}
|
|
if nullTerminated {
|
|
try appendOutput(0)
|
|
}
|
|
return bufferIdx
|
|
}
|
|
}
|
|
}
|
|
|
|
#if FOUNDATION_FRAMEWORK
|
|
@objc
|
|
extension NSString {
|
|
@objc
|
|
func __swiftFillFileSystemRepresentation(pointer: UnsafeMutablePointer<CChar>, maxLength: Int) -> Bool {
|
|
autoreleasepool {
|
|
let buffer = UnsafeMutableBufferPointer(start: pointer, count: maxLength)
|
|
|
|
guard !buffer.isEmpty else {
|
|
// No space for a null terminating byte, so it's not worth even trying to read the string contents
|
|
return false
|
|
}
|
|
|
|
// See if we have a quick-access buffer we can just convert directly
|
|
if let fastCharacters = self._fastCharacterContents() {
|
|
// If we have quick access to UTF-16 contents, decompose from UTF-16
|
|
let charsBuffer = UnsafeBufferPointer(start: fastCharacters, count: self.length)
|
|
return (try? charsBuffer._decomposedRebinding(.hfsPlus, as: Unicode.UTF16.self, into: buffer, nullTerminated: true)) != nil
|
|
} else if self.fastestEncoding == NSASCIIStringEncoding, let fastUTF8 = self._fastCStringContents(false) {
|
|
// If we have quick access to ASCII contents, no need to decompose
|
|
let utf8Buffer = UnsafeBufferPointer(start: fastUTF8, count: self.length)
|
|
|
|
// We only allow embedded nulls if there are no non-null characters following the first null character
|
|
if let embeddedNullIdx = utf8Buffer.firstIndex(of: 0) {
|
|
if !utf8Buffer[embeddedNullIdx...].allSatisfy({ $0 == 0 }) {
|
|
// Ensure the buffer is always null-terminated even on failure to prevent buffer over-reads - at this point we know the buffer is non-empty
|
|
buffer[0] = 0
|
|
return false
|
|
}
|
|
}
|
|
|
|
var (leftoverIterator, next) = buffer.initialize(from: utf8Buffer)
|
|
guard leftoverIterator.next() == nil && next < buffer.endIndex else {
|
|
// Ensure the buffer is always null-terminated even on failure to prevent buffer over-reads
|
|
buffer[buffer.endIndex - 1] = 0
|
|
return false
|
|
}
|
|
buffer[next] = 0
|
|
return true
|
|
} else {
|
|
// Otherwise, bridge to a String which will create a UTF-8 buffer
|
|
return String(self)._fileSystemRepresentation(into: buffer)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|