mirror of
https://github.com/apple/swift-foundation.git
synced 2025-05-28 09:47:07 +08:00
rdar://106965817 (FoundationEssentials: Internal character set)
Add a Swift-native character set that mirrors CF/NSCharacterSet to support FoundationEssentials. We will use this type to back `CharacterSet`, which is currently NS-bridged, when we get to re-core it when the time comes. Currently it's only used for String capitalization.
This commit is contained in:
parent
59b92b4a3b
commit
cde55ba8e1
92
Sources/FoundationEssentials/BuiltInUnicodeScalarSet.swift
Normal file
92
Sources/FoundationEssentials/BuiltInUnicodeScalarSet.swift
Normal file
@ -0,0 +1,92 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This source file is part of the Swift Collections open source project
|
||||
//
|
||||
// Copyright (c) 2022 Apple Inc. and the Swift project authors
|
||||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||||
//
|
||||
// See https://swift.org/LICENSE.txt for license information
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@_implementationOnly import _CShims
|
||||
|
||||
// Native implementation of CFCharacterSet.
|
||||
// Represents sets of unicode scalars of those whose bitmap data we own.
|
||||
// whitespace, whitespaceAndNewline, and newline are not included since they're not stored with bitmaps
|
||||
// This only contains a subset of predefined CFCharacterSet that are in use for now.
|
||||
internal struct BuiltInUnicodeScalarSet {
|
||||
enum SetType {
|
||||
case lowercaseLetter
|
||||
case uppercaseLetter
|
||||
case canonicalDecomposable
|
||||
|
||||
// Below are internal
|
||||
case caseIgnorable
|
||||
case graphemeExtend
|
||||
}
|
||||
|
||||
var charset: SetType
|
||||
init(type: SetType) {
|
||||
charset = type
|
||||
}
|
||||
|
||||
// Equivalent to __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID())
|
||||
private var _bitmapTableIndex: Int {
|
||||
switch charset {
|
||||
case .lowercaseLetter:
|
||||
return 2
|
||||
case .uppercaseLetter:
|
||||
return 3
|
||||
case .canonicalDecomposable:
|
||||
return 5
|
||||
case .caseIgnorable:
|
||||
return 20
|
||||
case .graphemeExtend:
|
||||
return 21
|
||||
}
|
||||
}
|
||||
|
||||
// CFUniCharIsMemberOf
|
||||
func contains(_ scalar: Unicode.Scalar) -> Bool {
|
||||
let planeNo = Int((scalar.value >> 16) & 0xFF)
|
||||
let bitmp = _bitmapPtrForPlane(planeNo)
|
||||
return _isMemberOfBitmap(scalar, bitmp)
|
||||
}
|
||||
|
||||
// CFUniCharGetBitmapPtrForPlane
|
||||
func _bitmapPtrForPlane(_ plane: Int) -> UnsafePointer<UInt8>? {
|
||||
let tableIndex = _bitmapTableIndex
|
||||
guard tableIndex < __CFUniCharNumberOfBitmaps else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let data = withUnsafePointer(to: __CFUniCharBitmapDataArray) { ptr in
|
||||
ptr.withMemoryRebound(to: __CFUniCharBitmapData.self, capacity: Int(__CFUniCharNumberOfBitmaps)) { bitmapDataPtr in
|
||||
bitmapDataPtr.advanced(by: tableIndex).pointee
|
||||
}
|
||||
}
|
||||
return plane < data._numPlanes ? data._planes[plane] : nil
|
||||
}
|
||||
|
||||
let bitShiftForByte = UInt16(3)
|
||||
let bitShiftForMask = UInt16(7)
|
||||
|
||||
// CFUniCharIsMemberOfBitmap
|
||||
func _isMemberOfBitmap(_ scalar: Unicode.Scalar, _ bitmap: UnsafePointer<UInt8>?) -> Bool {
|
||||
guard let bitmap else { return false }
|
||||
let theChar = UInt16(truncatingIfNeeded: scalar.value) // intentionally truncated
|
||||
|
||||
let position = bitmap[Int(theChar >> bitShiftForByte)]
|
||||
let mask = theChar & bitShiftForMask
|
||||
let new = (Int(position) & Int(UInt32(1) << mask)) != 0
|
||||
return new
|
||||
}
|
||||
|
||||
static let uppercaseLetter = Self.init(type: .uppercaseLetter)
|
||||
static let lowercaseLetter = Self.init(type: .lowercaseLetter)
|
||||
static let caseIgnorable = Self.init(type: .caseIgnorable)
|
||||
static let graphemeExtend = Self.init(type: .graphemeExtend)
|
||||
static let canonicalDecomposable = Self.init(type: .canonicalDecomposable)
|
||||
}
|
||||
|
17
Sources/_CShims/include/CFUniCharBitmapData.h
Normal file
17
Sources/_CShims/include/CFUniCharBitmapData.h
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
CFUniCharBitmapData.h
|
||||
Copyright (c) 1999-2021, Apple Inc. and the Swift project authors. All rights reserved.
|
||||
This file is generated. Don't touch this file directly.
|
||||
*/
|
||||
|
||||
#ifndef _cfunichar_bitmap_data_h
|
||||
#define _cfunichar_bitmap_data_h
|
||||
|
||||
#include "_CStdlib.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t _numPlanes;
|
||||
const uint8_t **_planes;
|
||||
} __CFUniCharBitmapData;
|
||||
|
||||
#endif /* _cfunichar_bitmap_data_h */
|
14706
Sources/_CShims/include/CFUniCharBitmapData.inc.h
Normal file
14706
Sources/_CShims/include/CFUniCharBitmapData.inc.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -130,5 +130,9 @@
|
||||
#include <uchar.h>
|
||||
#endif
|
||||
|
||||
#if __has_include(<stdint.h>)
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#endif // FOUNDATION_CSTDLIB
|
||||
|
||||
|
@ -0,0 +1,61 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This source file is part of the Swift.org open source project
|
||||
//
|
||||
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
||||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||||
//
|
||||
// See https://swift.org/LICENSE.txt for license information
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if canImport(TestSupport)
|
||||
import TestSupport
|
||||
#endif
|
||||
|
||||
#if FOUNDATION_FRAMEWORK
|
||||
@testable import Foundation
|
||||
#else
|
||||
@testable import FoundationEssentials
|
||||
#endif // FOUNDATION_FRAMEWORK
|
||||
|
||||
final class BuiltInUnicodeScalarSetTest: XCTestCase {
|
||||
|
||||
func testMembership() {
|
||||
func setContainsScalar(_ set: BuiltInUnicodeScalarSet, _ scalar: Unicode.Scalar, _ expect: Bool, file: StaticString = #file, line: UInt = #line) {
|
||||
let actual = set.contains(scalar)
|
||||
XCTAssertEqual(actual, expect, file: file, line: line)
|
||||
}
|
||||
|
||||
setContainsScalar(.lowercaseLetter, "a", true)
|
||||
setContainsScalar(.lowercaseLetter, "ô", true)
|
||||
setContainsScalar(.lowercaseLetter, "\u{01FB}", true)
|
||||
setContainsScalar(.lowercaseLetter, "\u{1FF7}", true)
|
||||
setContainsScalar(.lowercaseLetter, "\u{1D467}", true)
|
||||
setContainsScalar(.lowercaseLetter, "A", false)
|
||||
|
||||
setContainsScalar(.uppercaseLetter, "A", true)
|
||||
setContainsScalar(.uppercaseLetter, "À", true)
|
||||
setContainsScalar(.uppercaseLetter, "\u{01CF}", true)
|
||||
setContainsScalar(.uppercaseLetter, "\u{1E5C}", true)
|
||||
setContainsScalar(.uppercaseLetter, "\u{1D4A9}", true)
|
||||
setContainsScalar(.uppercaseLetter, "a", false)
|
||||
|
||||
setContainsScalar(.caseIgnorable, "'", true)
|
||||
setContainsScalar(.caseIgnorable, "ʻ", true)
|
||||
setContainsScalar(.caseIgnorable, "\u{00B4}", true) // ACUTE ACCENT
|
||||
setContainsScalar(.caseIgnorable, "\u{10792}", true) // MODIFIER LETTER SMALL CAPITAL G
|
||||
setContainsScalar(.caseIgnorable, "\u{E0020}", true)
|
||||
setContainsScalar(.caseIgnorable, "0", false)
|
||||
|
||||
setContainsScalar(.graphemeExtend, "\u{0300}", true)
|
||||
setContainsScalar(.graphemeExtend, "\u{0610}", true)
|
||||
setContainsScalar(.graphemeExtend, "\u{302A}", true) // IDEOGRAPHIC LEVEL TONE MARK
|
||||
setContainsScalar(.graphemeExtend, "\u{1D17B}", true) // MUSICAL SYMBOL COMBINING ACCENT
|
||||
setContainsScalar(.graphemeExtend, "\u{E0020}", true) // TAG SPACE
|
||||
setContainsScalar(.graphemeExtend, "A", false)
|
||||
setContainsScalar(.graphemeExtend, "~", false)
|
||||
}
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user