David Smith b0b89ccac1
Allocationless constant String -> NSString bridging via a new tagged pointer type (#1232)
* Revert "Revert "rdar://142693100 (Allocationless constant String -> NSString bridging via a new tagged pointer type) (#2798)" (#2843)"

This reverts commit c378439322f5b960ecb972b27ab1419a622ad6b2.

* Adopt the new entry point for bridging the new tagged pointers

* All remaining callsites passed null-terminated strings, so just eliminate the isTerminated bit and simplify further. Also gets us up to 13 bits of length

* Address review comments
2025-03-28 10:19:09 -07:00

288 lines
11 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#if FOUNDATION_FRAMEWORK
@_exported import Foundation // Clang module
@_spi(Foundation) import Swift
internal import CoreFoundation_Private.CFString
internal import ObjectiveC_Private.objc_internal
internal import CoreFoundation_Private.ForFoundationOnly
//===----------------------------------------------------------------------===//
// New Strings
//===----------------------------------------------------------------------===//
//
// Conversion from NSString to Swift's native representation
//
@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
extension String {
public init(_ cocoaString: NSString) {
self = String._unconditionallyBridgeFromObjectiveC(cocoaString)
}
}
@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
extension String : _ObjectiveCBridgeable {
@_semantics("convertToObjectiveC")
public func _bridgeToObjectiveC() -> NSString {
// This method should not do anything extra except calling into the
// implementation inside core. (These two entry points should be
// equivalent.)
return unsafeBitCast(_bridgeToObjectiveCImpl(), to: NSString.self)
}
public static func _forceBridgeFromObjectiveC(_ x: NSString, result: inout String?) {
result = String._unconditionallyBridgeFromObjectiveC(x)
}
public static func _conditionallyBridgeFromObjectiveC(_ x: NSString, result: inout String?) -> Bool {
self._forceBridgeFromObjectiveC(x, result: &result)
return result != nil
}
@_effects(readonly)
public static func _unconditionallyBridgeFromObjectiveC(_ source: NSString?) -> String {
// `nil` has historically been used as a stand-in for an empty
// string; map it to an empty string.
guard let source = source else {
return ""
}
#if !(arch(i386) || arch(arm) || arch(arm64_32))
var SMALL_STRING_CAPACITY:Int { 15 }
if OBJC_HAVE_TAGGED_POINTERS == 1 && _objc_isTaggedPointer(unsafeBitCast(source, to: UnsafeRawPointer.self)) {
let tag = _objc_getTaggedPointerTag(unsafeBitCast(source, to: UnsafeRawPointer.self))
if tag == OBJC_TAG_NSString {
return String(unsafeUninitializedCapacity: SMALL_STRING_CAPACITY) {
_NSTaggedPointerStringGetBytes(source, $0.baseAddress!)
}
} else if tag == OBJC_TAG_NSAtom {
var len = UInt16(0)
let contentsPtr = _CFIndirectTaggedPointerStringGetContents(source, &len)
let contents = UnsafeBufferPointer(start: contentsPtr, count: Int(len))
// Will only fail if contents aren't valid UTF8/ASCII
if let result = _SwiftCreateImmortalString_ForFoundation(buffer: contents, isASCII: true) {
return result
}
// Since our contents are invalid, force a real copy of the string and bridge that instead. This should basically never be hit in practice
return source.mutableCopy() as! String
} else if tag.rawValue == 22 /* OBJC_TAG_Foundation_1 */ {
let cStr = source.utf8String!
return String.init(utf8String: cStr)!
}
}
#endif
var ascii = false
var len = 0
var mutable = false
var constant = false
if __CFStringIsCF(unsafeBitCast(source, to: CFString.self), &mutable, &len, &ascii, &constant) {
if len == 0 {
return ""
}
if constant {
if ascii {
// We would like to use _SwiftCreateImmortalString_ForFoundation here, but we can't because we need to maintain the invariant
// (constantString as String as NSString) === constantString
// and using _SwiftCreateImmortalString_ForFoundation would make an indirect tagged string instead on the way back
return String(_immortalCocoaString: source, count: len, encoding: Unicode.ASCII.self)
} else {
return String(_immortalCocoaString: source, count: len, encoding: Unicode.UTF16.self)
}
}
/*
If `source` is a mutable string, we should eagerly bridge.
Lazy bridging will still wastefully copy it to immutable first.
*/
if mutable {
let eagerBridge = { (source: NSString, encoding: CFStringBuiltInEncodings, capacity: Int) -> String? in
let result = String(unsafeUninitializedCapacity: capacity) { buffer in
var usedLen = 0
let convertedCount = _CFNonObjCStringGetBytes(
unsafeBitCast(source, to: CFString.self),
CFRangeMake(0, len),
encoding.rawValue,
0,
false,
buffer.baseAddress.unsafelyUnwrapped,
capacity,
&usedLen
)
if convertedCount != len {
return 0
}
return usedLen
}
return result.isEmpty ? nil : result
}
if ascii {
if let result = eagerBridge(source, CFStringBuiltInEncodings.ASCII, len) {
return result
}
} else {
if let result = eagerBridge(source, CFStringBuiltInEncodings.UTF8, source.lengthOfBytes(using: String.Encoding.utf8.rawValue)) {
return result
}
}
}
} else { //not an __NSCFString
// If `source` is a Swift-native string ("rebridging"), we should return it as-is.
if let result = String(_nativeStorage: source) {
return result
}
len = source.length
if len == 0 {
return ""
}
}
/*
Future
If `source` is an immutable NSCFString, we should get what String
needs with as few calls as possible. A unified
"get isASCII, providesFastUTF8, length" method? Is this the same
"prepare for bridging" concept?
Complication: if we can prove an NSCFString has no associated objects
we may want to eagerly bridge it. Checking that is rdar://50255938,
but even once we have that, when to eagerly bridge vs lazily bridge
is not necessarily an obvious set of tradeoffs.
Existing stdlib entry points:
`String(unsafeUninitializedCapacity:, initializingWith:)`
`String(_cocoaString:)`
`String?(_nativeStorage:)`
`String(_immortalCocoaString:, count:, encoding:)`
Should the stdlib expose
`String(_preparedCocoaString:,isASCII:,providesFastUTF8:,length:)` for
this code to use? Is there a better signature? (This one is just the
one from the underlying _StringGuts() initializer)
If we do all this, aside from major speedups, we can delete everything
in _bridgeCocoaString() in the stdlib, keeping all the smarts about
how bridging works in one place (here).
*/
return String(_cocoaString: source)
}
}
@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
extension Substring : _ObjectiveCBridgeable {
@_semantics("convertToObjectiveC")
public func _bridgeToObjectiveC() -> NSString {
return String(self)._bridgeToObjectiveC()
}
public static func _forceBridgeFromObjectiveC(_ x: NSString, result: inout Substring?) {
let s = String(x)
result = s[...]
}
public static func _conditionallyBridgeFromObjectiveC(_ x: NSString, result: inout Substring?) -> Bool {
self._forceBridgeFromObjectiveC(x, result: &result)
return result != nil
}
@_effects(readonly)
public static func _unconditionallyBridgeFromObjectiveC(_ source: NSString?) -> Substring {
let str = String._unconditionallyBridgeFromObjectiveC(source)
return str[...]
}
}
@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
extension String: CVarArg {}
/*
This is on NSObject so that the stdlib can call it in StringBridge.swift
without having to synthesize a receiver (e.g. lookup a class or allocate)
TODO: In the future (once the Foundation overlay can know about SmallString),
we should move the caller of this method up into the overlay and avoid this
indirection.
*/
private extension NSObject {
// The ObjC selector has to start with "new" to get ARC to not autorelease
@_effects(releasenone)
@objc(newTaggedNSStringWithASCIIBytes_:length_:)
func createTaggedString(bytes: UnsafePointer<UInt8>,
count: Int) -> AnyObject? {
if let result = _CFStringCreateTaggedPointerString(
bytes,
count
) {
return result.takeRetainedValue() as NSString as NSString? //just "as AnyObject" inserts unwanted bridging
}
return nil
}
}
extension Substring {
func _components(separatedBy characterSet: CharacterSet) -> [String] {
var result = [String]()
var searchStart = startIndex
while searchStart < endIndex {
let r = self[searchStart...]._rangeOfCharacter(from: characterSet, options: [])
guard let r, !r.isEmpty else {
break
}
result.append(String(self[searchStart ..< r.lowerBound]))
searchStart = r.upperBound
}
result.append(String(self[searchStart..<endIndex]))
return result
}
}
extension BidirectionalCollection where Element == Unicode.Scalar, Index == String.Index {
func _trimmingCharacters(in set: CharacterSet) -> SubSequence {
var idx = startIndex
while idx < endIndex && set.contains(self[idx]) {
formIndex(after: &idx)
}
let startOfNonTrimmedRange = idx // Points at the first char not in the set
guard startOfNonTrimmedRange != endIndex else {
return self[endIndex...]
}
let beforeEnd = index(before: endIndex)
guard startOfNonTrimmedRange < beforeEnd else {
return self[startOfNonTrimmedRange ..< endIndex]
}
var backIdx = beforeEnd
// No need to bound-check because we've already trimmed from the beginning, so we'd definitely break off of this loop before `backIdx` rewinds before `startIndex`
while set.contains(self[backIdx]) {
formIndex(before: &backIdx)
}
return self[startOfNonTrimmedRange ... backIdx]
}
}
#endif