rdar://106965817 (FoundationEssentials: Internal character set)

Add a Swift-native character set that mirrors CF/NSCharacterSet to support FoundationEssentials. We will use this type to back `CharacterSet`, which is currently NS-bridged, when we get to re-core it when the time comes.

Currently it's only used for String capitalization.
This commit is contained in:
I-Ting Tina Liu 2023-03-20 15:38:55 -07:00
parent 59b92b4a3b
commit cde55ba8e1
5 changed files with 14880 additions and 0 deletions

View File

@ -0,0 +1,92 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Collections open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//
@_implementationOnly import _CShims
// Native implementation of CFCharacterSet.
// Represents sets of unicode scalars of those whose bitmap data we own.
// whitespace, whitespaceAndNewline, and newline are not included since they're not stored with bitmaps
// This only contains a subset of predefined CFCharacterSet that are in use for now.
internal struct BuiltInUnicodeScalarSet {
enum SetType {
case lowercaseLetter
case uppercaseLetter
case canonicalDecomposable
// Below are internal
case caseIgnorable
case graphemeExtend
}
var charset: SetType
init(type: SetType) {
charset = type
}
// Equivalent to __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID())
private var _bitmapTableIndex: Int {
switch charset {
case .lowercaseLetter:
return 2
case .uppercaseLetter:
return 3
case .canonicalDecomposable:
return 5
case .caseIgnorable:
return 20
case .graphemeExtend:
return 21
}
}
// CFUniCharIsMemberOf
func contains(_ scalar: Unicode.Scalar) -> Bool {
let planeNo = Int((scalar.value >> 16) & 0xFF)
let bitmp = _bitmapPtrForPlane(planeNo)
return _isMemberOfBitmap(scalar, bitmp)
}
// CFUniCharGetBitmapPtrForPlane
func _bitmapPtrForPlane(_ plane: Int) -> UnsafePointer<UInt8>? {
let tableIndex = _bitmapTableIndex
guard tableIndex < __CFUniCharNumberOfBitmaps else {
return nil
}
let data = withUnsafePointer(to: __CFUniCharBitmapDataArray) { ptr in
ptr.withMemoryRebound(to: __CFUniCharBitmapData.self, capacity: Int(__CFUniCharNumberOfBitmaps)) { bitmapDataPtr in
bitmapDataPtr.advanced(by: tableIndex).pointee
}
}
return plane < data._numPlanes ? data._planes[plane] : nil
}
let bitShiftForByte = UInt16(3)
let bitShiftForMask = UInt16(7)
// CFUniCharIsMemberOfBitmap
func _isMemberOfBitmap(_ scalar: Unicode.Scalar, _ bitmap: UnsafePointer<UInt8>?) -> Bool {
guard let bitmap else { return false }
let theChar = UInt16(truncatingIfNeeded: scalar.value) // intentionally truncated
let position = bitmap[Int(theChar >> bitShiftForByte)]
let mask = theChar & bitShiftForMask
let new = (Int(position) & Int(UInt32(1) << mask)) != 0
return new
}
static let uppercaseLetter = Self.init(type: .uppercaseLetter)
static let lowercaseLetter = Self.init(type: .lowercaseLetter)
static let caseIgnorable = Self.init(type: .caseIgnorable)
static let graphemeExtend = Self.init(type: .graphemeExtend)
static let canonicalDecomposable = Self.init(type: .canonicalDecomposable)
}

View File

@ -0,0 +1,17 @@
/*
CFUniCharBitmapData.h
Copyright (c) 1999-2021, Apple Inc. and the Swift project authors. All rights reserved.
This file is generated. Don't touch this file directly.
*/
#ifndef _cfunichar_bitmap_data_h
#define _cfunichar_bitmap_data_h
#include "_CStdlib.h"
typedef struct {
uint32_t _numPlanes;
const uint8_t **_planes;
} __CFUniCharBitmapData;
#endif /* _cfunichar_bitmap_data_h */

File diff suppressed because it is too large Load Diff

View File

@ -130,5 +130,9 @@
#include <uchar.h>
#endif
#if __has_include(<stdint.h>)
#include <stdint.h>
#endif
#endif // FOUNDATION_CSTDLIB

View File

@ -0,0 +1,61 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#if canImport(TestSupport)
import TestSupport
#endif
#if FOUNDATION_FRAMEWORK
@testable import Foundation
#else
@testable import FoundationEssentials
#endif // FOUNDATION_FRAMEWORK
final class BuiltInUnicodeScalarSetTest: XCTestCase {
func testMembership() {
func setContainsScalar(_ set: BuiltInUnicodeScalarSet, _ scalar: Unicode.Scalar, _ expect: Bool, file: StaticString = #file, line: UInt = #line) {
let actual = set.contains(scalar)
XCTAssertEqual(actual, expect, file: file, line: line)
}
setContainsScalar(.lowercaseLetter, "a", true)
setContainsScalar(.lowercaseLetter, "ô", true)
setContainsScalar(.lowercaseLetter, "\u{01FB}", true)
setContainsScalar(.lowercaseLetter, "\u{1FF7}", true)
setContainsScalar(.lowercaseLetter, "\u{1D467}", true)
setContainsScalar(.lowercaseLetter, "A", false)
setContainsScalar(.uppercaseLetter, "A", true)
setContainsScalar(.uppercaseLetter, "À", true)
setContainsScalar(.uppercaseLetter, "\u{01CF}", true)
setContainsScalar(.uppercaseLetter, "\u{1E5C}", true)
setContainsScalar(.uppercaseLetter, "\u{1D4A9}", true)
setContainsScalar(.uppercaseLetter, "a", false)
setContainsScalar(.caseIgnorable, "'", true)
setContainsScalar(.caseIgnorable, "ʻ", true)
setContainsScalar(.caseIgnorable, "\u{00B4}", true) // ACUTE ACCENT
setContainsScalar(.caseIgnorable, "\u{10792}", true) // MODIFIER LETTER SMALL CAPITAL G
setContainsScalar(.caseIgnorable, "\u{E0020}", true)
setContainsScalar(.caseIgnorable, "0", false)
setContainsScalar(.graphemeExtend, "\u{0300}", true)
setContainsScalar(.graphemeExtend, "\u{0610}", true)
setContainsScalar(.graphemeExtend, "\u{302A}", true) // IDEOGRAPHIC LEVEL TONE MARK
setContainsScalar(.graphemeExtend, "\u{1D17B}", true) // MUSICAL SYMBOL COMBINING ACCENT
setContainsScalar(.graphemeExtend, "\u{E0020}", true) // TAG SPACE
setContainsScalar(.graphemeExtend, "A", false)
setContainsScalar(.graphemeExtend, "~", false)
}
}