mirror of
https://github.com/apple/swift-foundation.git
synced 2025-05-22 05:19:57 +08:00
There appears to be some kind of race or memory smash in ICU after these, and we need more time to investigate the full root cause.
345 lines
14 KiB
Swift
345 lines
14 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2023 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#if FOUNDATION_FRAMEWORK
|
|
@_spi(_Unicode) import Swift
|
|
internal import Foundation_Private.NSString
|
|
#endif
|
|
|
|
#if canImport(Darwin)
|
|
import Darwin
|
|
#endif
|
|
|
|
#if os(Windows)
|
|
import WinSDK
|
|
|
|
extension String {
|
|
package func withNTPathRepresentation<Result>(_ body: (UnsafePointer<WCHAR>) throws -> Result) throws -> Result {
|
|
guard !isEmpty else {
|
|
throw CocoaError.errorWithFilePath(.fileReadInvalidFileName, "")
|
|
}
|
|
|
|
var iter = self.utf8.makeIterator()
|
|
let bLeadingSlash = if [._slash, ._backslash].contains(iter.next()), iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false }
|
|
|
|
// Strip the leading `/` on a RFC8089 path (`/[drive-letter]:/...` ). A
|
|
// leading slash indicates a rooted path on the drive for the current
|
|
// working directory.
|
|
return try Substring(self.utf8.dropFirst(bLeadingSlash ? 1 : 0)).withCString(encodedAs: UTF16.self) { pwszPath in
|
|
// 1. Normalize the path first.
|
|
let dwLength: DWORD = GetFullPathNameW(pwszPath, 0, nil, nil)
|
|
return try withUnsafeTemporaryAllocation(of: WCHAR.self, capacity: Int(dwLength)) {
|
|
guard GetFullPathNameW(pwszPath, DWORD($0.count), $0.baseAddress, nil) > 0 else {
|
|
throw CocoaError.errorWithFilePath(self, win32: GetLastError(), reading: true)
|
|
}
|
|
|
|
// 2. Perform the operation on the normalized path.
|
|
return try body($0.baseAddress!)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
extension String {
|
|
package func _trimmingWhitespace() -> String {
|
|
String(unicodeScalars._trimmingCharacters {
|
|
$0.properties.isWhitespace
|
|
})
|
|
}
|
|
|
|
package init?(_utf16 input: UnsafeBufferPointer<UInt16>) {
|
|
// Allocate input.count * 3 code points since one UTF16 code point may require up to three UTF8 code points when transcoded
|
|
let str = withUnsafeTemporaryAllocation(of: UTF8.CodeUnit.self, capacity: input.count * 3) { contents in
|
|
var count = 0
|
|
let error = transcode(input.makeIterator(), from: UTF16.self, to: UTF8.self, stoppingOnError: true) { codeUnit in
|
|
contents[count] = codeUnit
|
|
count += 1
|
|
}
|
|
|
|
guard !error else {
|
|
return nil as String?
|
|
}
|
|
|
|
return String._tryFromUTF8(UnsafeBufferPointer(rebasing: contents[..<count]))
|
|
}
|
|
|
|
guard let str else {
|
|
return nil
|
|
}
|
|
self = str
|
|
}
|
|
|
|
package init?(_utf16 input: UnsafeMutableBufferPointer<UInt16>, count: Int) {
|
|
guard let str = String(_utf16: UnsafeBufferPointer(rebasing: input[..<count])) else {
|
|
return nil
|
|
}
|
|
self = str
|
|
}
|
|
|
|
package init?(_utf16 input: UnsafePointer<UInt16>, count: Int) {
|
|
guard let str = String(_utf16: UnsafeBufferPointer(start: input, count: count)) else {
|
|
return nil
|
|
}
|
|
self = str
|
|
}
|
|
|
|
enum _NormalizationType {
|
|
case canonical
|
|
case hfsPlus
|
|
|
|
fileprivate var setType: BuiltInUnicodeScalarSet.SetType {
|
|
switch self {
|
|
case .canonical: .canonicalDecomposable
|
|
case .hfsPlus: .hfsPlusDecomposable
|
|
}
|
|
}
|
|
}
|
|
|
|
private func _decomposed(_ type: String._NormalizationType, into buffer: UnsafeMutableBufferPointer<UInt8>, nullTerminated: Bool = false) -> Int? {
|
|
var copy = self
|
|
return copy.withUTF8 {
|
|
try? $0._decomposed(type, as: Unicode.UTF8.self, into: buffer, nullTerminated: nullTerminated)
|
|
}
|
|
}
|
|
|
|
#if canImport(Darwin) || FOUNDATION_FRAMEWORK
|
|
fileprivate func _fileSystemRepresentation(into buffer: UnsafeMutableBufferPointer<CChar>) -> Bool {
|
|
let result = buffer.withMemoryRebound(to: UInt8.self) { rebound in
|
|
_decomposed(.hfsPlus, into: rebound, nullTerminated: true)
|
|
}
|
|
return result != nil
|
|
}
|
|
|
|
private var maxFileSystemRepresentationSize: Int {
|
|
// The Darwin file system representation expands the UTF-8 contents to decomposed UTF-8 contents (only decomposing specific scalars)
|
|
// For any given scalar that we decompose, we will increase its UTF-8 length by at most a factor of 3 during decomposition
|
|
// (ex. U+0390 expands from 2 to 6 UTF-8 code-units, U+1D160 expands from 4 to 12 UTF-8 code-units)
|
|
// Therefore in the worst case scenario, the result will be the UTF-8 length multiplied by a factor of 3 plus an additional byte for the null byte
|
|
self.utf8.count * 3 + 1
|
|
}
|
|
#endif
|
|
|
|
package func withFileSystemRepresentation<R>(_ block: (UnsafePointer<CChar>?) throws -> R) rethrows -> R {
|
|
#if canImport(Darwin) || FOUNDATION_FRAMEWORK
|
|
try withUnsafeTemporaryAllocation(of: CChar.self, capacity: maxFileSystemRepresentationSize) { buffer in
|
|
guard _fileSystemRepresentation(into: buffer) else {
|
|
return try block(nil)
|
|
}
|
|
return try block(buffer.baseAddress!)
|
|
}
|
|
#else
|
|
#if os(Windows)
|
|
var iter = self.utf8.makeIterator()
|
|
let bLeadingSlash = if iter.next() == ._slash, iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false }
|
|
// Strip the leading `/` on a RFC8089 path (`/[drive-letter]:/...` ). A
|
|
// leading slash indicates a rooted path on the drive for the current
|
|
// working directory.
|
|
return try Substring(self.utf8.dropFirst(bLeadingSlash ? 1 : 0)).replacing(._slash, with: ._backslash).withCString {
|
|
try block($0)
|
|
}
|
|
#else
|
|
return try withCString {
|
|
try block($0)
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
package func withMutableFileSystemRepresentation<R>(_ block: (UnsafeMutablePointer<CChar>?) throws -> R) rethrows -> R {
|
|
#if canImport(Darwin) || FOUNDATION_FRAMEWORK
|
|
try withUnsafeTemporaryAllocation(of: CChar.self, capacity: maxFileSystemRepresentationSize) { buffer in
|
|
guard _fileSystemRepresentation(into: buffer) else {
|
|
return try block(nil)
|
|
}
|
|
return try block(buffer.baseAddress!)
|
|
}
|
|
#else
|
|
#if os(Windows)
|
|
var iter = self.utf8.makeIterator()
|
|
let bLeadingSlash = if iter.next() == ._slash, iter.next()?.isLetter ?? false, iter.next() == ._colon { true } else { false }
|
|
var mut: String =
|
|
Substring(self.utf8[self.utf8.index(self.utf8.startIndex, offsetBy: bLeadingSlash ? 1 : 0)...])
|
|
.replacing(._slash, with: ._backslash)
|
|
#else
|
|
var mut: String = self
|
|
#endif
|
|
|
|
return try mut.withUTF8 { utf8Buffer in
|
|
// Leave space for a null byte at the end
|
|
try withUnsafeTemporaryAllocation(of: CChar.self, capacity: utf8Buffer.count + 1) { temporaryBuffer in
|
|
try utf8Buffer.withMemoryRebound(to: CChar.self) { utf8CCharBuffer in
|
|
let nullByteIndex = temporaryBuffer.initialize(fromContentsOf: utf8CCharBuffer)
|
|
// Null-terminate
|
|
temporaryBuffer.initializeElement(at: nullByteIndex, to: CChar(0))
|
|
let result = try block(temporaryBuffer.baseAddress)
|
|
temporaryBuffer.prefix(through: nullByteIndex).deinitialize()
|
|
return result
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
}
|
|
|
|
extension UnsafeBufferPointer {
|
|
private enum DecompositionError : Error {
|
|
case insufficientSpace
|
|
case illegalScalar
|
|
case decodingError
|
|
}
|
|
|
|
fileprivate func _decomposedRebinding<T: UnicodeCodec, InputElement>(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer<InputElement>, nullTerminated: Bool = false) throws -> Int {
|
|
try self.withMemoryRebound(to: T.CodeUnit.self) { reboundSelf in
|
|
try buffer.withMemoryRebound(to: Unicode.UTF8.CodeUnit.self) { reboundBuffer in
|
|
try reboundSelf._decomposed(type, as: codec, into: reboundBuffer, nullTerminated: nullTerminated)
|
|
}
|
|
}
|
|
}
|
|
|
|
fileprivate func _decomposed<T: UnicodeCodec>(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer<UInt8>, nullTerminated: Bool = false) throws -> Int where Element == T.CodeUnit {
|
|
let scalarSet = BuiltInUnicodeScalarSet(type: type.setType)
|
|
var bufferIdx = 0
|
|
let bufferLength = buffer.count
|
|
var sortBuffer: [UnicodeScalar] = []
|
|
var seenNullIdx: Int? = nil
|
|
var decoder = T()
|
|
var iterator = self.makeIterator()
|
|
|
|
func appendOutput(_ values: some Collection<UInt8>) throws {
|
|
let bufferPortion = UnsafeMutableBufferPointer(start: buffer.baseAddress!.advanced(by: bufferIdx), count: bufferLength - bufferIdx)
|
|
guard bufferPortion.count >= values.count else {
|
|
throw DecompositionError.insufficientSpace
|
|
}
|
|
bufferIdx += bufferPortion.initialize(fromContentsOf: values)
|
|
}
|
|
|
|
func appendOutput(_ value: UInt8) throws {
|
|
guard bufferIdx < bufferLength else {
|
|
throw DecompositionError.insufficientSpace
|
|
}
|
|
buffer.initializeElement(at: bufferIdx, to: value)
|
|
bufferIdx += 1
|
|
}
|
|
|
|
func encodedScalar(_ scalar: UnicodeScalar) throws -> some Collection<UInt8> {
|
|
guard let encoded = UTF8.encode(scalar) else {
|
|
throw DecompositionError.illegalScalar
|
|
}
|
|
return encoded
|
|
}
|
|
|
|
func fillFromSortBuffer() throws {
|
|
guard !sortBuffer.isEmpty else { return }
|
|
sortBuffer.sort {
|
|
$0.properties.canonicalCombiningClass.rawValue < $1.properties.canonicalCombiningClass.rawValue
|
|
}
|
|
for scalar in sortBuffer {
|
|
try appendOutput(encodedScalar(scalar))
|
|
}
|
|
sortBuffer.removeAll(keepingCapacity: true)
|
|
}
|
|
|
|
decodingLoop: while bufferIdx < bufferLength {
|
|
var scalar: UnicodeScalar
|
|
switch decoder.decode(&iterator) {
|
|
// We've finished the input, return the index
|
|
case .emptyInput: break decodingLoop
|
|
case .error: throw DecompositionError.decodingError
|
|
case .scalarValue(let v): scalar = v
|
|
}
|
|
|
|
if scalar.value == 0 {
|
|
// Null bytes within the string are fine as long as they are at the end
|
|
seenNullIdx = bufferIdx
|
|
} else if seenNullIdx != nil {
|
|
// File system representations are c-strings that do not support embedded null bytes
|
|
throw DecompositionError.illegalScalar
|
|
}
|
|
|
|
let isASCII = scalar.isASCII
|
|
if isASCII || scalar.properties.canonicalCombiningClass == .notReordered {
|
|
try fillFromSortBuffer()
|
|
}
|
|
|
|
if isASCII {
|
|
try appendOutput(UInt8(scalar.value))
|
|
} else {
|
|
#if FOUNDATION_FRAMEWORK
|
|
// Only decompose scalars present in the declared set
|
|
if scalarSet.contains(scalar) {
|
|
sortBuffer.append(contentsOf: String(scalar)._nfd)
|
|
} else {
|
|
// Even if a scalar isn't decomposed, it may still need to be re-ordered
|
|
sortBuffer.append(scalar)
|
|
}
|
|
#else
|
|
// TODO: Implement Unicode decomposition in swift-foundation
|
|
sortBuffer.append(scalar)
|
|
#endif
|
|
}
|
|
}
|
|
try fillFromSortBuffer()
|
|
|
|
if iterator.next() != nil {
|
|
throw DecompositionError.insufficientSpace
|
|
} else {
|
|
if let seenNullIdx {
|
|
return seenNullIdx + 1
|
|
}
|
|
if nullTerminated {
|
|
try appendOutput(0)
|
|
}
|
|
return bufferIdx
|
|
}
|
|
}
|
|
}
|
|
|
|
#if FOUNDATION_FRAMEWORK
|
|
@objc
|
|
extension NSString {
|
|
@objc
|
|
func __swiftFillFileSystemRepresentation(pointer: UnsafeMutablePointer<CChar>, maxLength: Int) -> Bool {
|
|
autoreleasepool {
|
|
let buffer = UnsafeMutableBufferPointer(start: pointer, count: maxLength)
|
|
// See if we have a quick-access buffer we can just convert directly
|
|
if let fastCharacters = self._fastCharacterContents() {
|
|
// If we have quick access to UTF-16 contents, decompose from UTF-16
|
|
let charsBuffer = UnsafeBufferPointer(start: fastCharacters, count: self.length)
|
|
return (try? charsBuffer._decomposedRebinding(.hfsPlus, as: Unicode.UTF16.self, into: buffer, nullTerminated: true)) != nil
|
|
} else if self.fastestEncoding == NSASCIIStringEncoding, let fastUTF8 = self._fastCStringContents(false) {
|
|
// If we have quick access to ASCII contents, no need to decompose
|
|
let utf8Buffer = UnsafeBufferPointer(start: fastUTF8, count: self.length)
|
|
|
|
// We only allow embedded nulls if there are no non-null characters following the first null character
|
|
if let embeddedNullIdx = utf8Buffer.firstIndex(of: 0) {
|
|
if !utf8Buffer[embeddedNullIdx...].allSatisfy({ $0 == 0 }) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
var (leftoverIterator, next) = buffer.initialize(from: utf8Buffer)
|
|
guard leftoverIterator.next() == nil && next < buffer.endIndex else {
|
|
return false
|
|
}
|
|
buffer[next] = 0
|
|
return true
|
|
} else {
|
|
// Otherwise, bridge to a String which will create a UTF-8 buffer
|
|
return String(self)._fileSystemRepresentation(into: buffer)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|