Charles Hu 60506f3af4
Renamed _CShims to _FoundationCShims (#656)
Rationale: _CShims will effectivly become semi-public in the toolchain. We add the Foundation prefix to make it less generic.
2024-06-21 16:18:38 -07:00

777 lines
33 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
internal import _FoundationCShims
typealias BPlistObjectIndex = Int
private enum BPlistTypeMarker: UInt8 {
case null = 0x00
case `false` = 0x08
case `true` = 0x09
case int = 0x10
case real = 0x20
case date = 0x33
case data = 0x40
case asciiString = 0x50
case utf16String = 0x60
case uid = 0x80
case array = 0xA0
case set = 0xC0
case dict = 0xD0
init?(_ marker: UInt8) {
switch marker & 0xf0 {
case 0x00:
switch (marker) {
case Self.null.rawValue:
self = .null
case Self.false.rawValue:
self = .false
case Self.true.rawValue:
self = .true
default:
return nil
}
case Self.int.rawValue:
self = .int
case Self.real.rawValue:
self = .real
case Self.date.rawValue & 0xf0:
guard marker == Self.date.rawValue else {
return nil
}
self = .date
case Self.data.rawValue:
self = .data
case Self.asciiString.rawValue:
self = .asciiString
case Self.utf16String.rawValue:
self = .utf16String
case Self.uid.rawValue:
self = .uid
case Self.array.rawValue:
self = .array
case Self.set.rawValue:
self = .set
case Self.dict.rawValue:
self = .dict
default:
return nil
}
}
}
class BPlistMap : PlistDecodingMap {
internal indirect enum Value {
case string(Region, isAscii: Bool)
case array([BPlistObjectIndex])
case set([BPlistObjectIndex])
case dict([BPlistObjectIndex:BPlistObjectIndex])
case data(Region)
case date(UInt64)
case boolean(Bool)
case real(UInt64, byteCount: Int)
case integer(UInt64, useSignedRepresentation: Bool)
case uid
case nativeNull
case sentinelNull
}
struct Region {
let startOffset: Int
let count: Int
}
@inline(__always)
static var nullValue: Value { .nativeNull }
private let trailer : BPlistTrailer
let topObjectIndex : BPlistObjectIndex
let objectOffsets : [UInt64]
var dataLock : LockedState<(buffer: BufferView<UInt8>, allocation: UnsafeRawPointer?)>
init (buffer: BufferView<UInt8>, trailer: BPlistTrailer, objectOffsets: [UInt64]) {
self.dataLock = .init(initialState: (buffer: buffer, allocation: nil))
self.trailer = trailer
self.topObjectIndex = BPlistObjectIndex(trailer._topObject)
self.objectOffsets = objectOffsets
}
func copyInBuffer() {
dataLock.withLock { state in
guard state.allocation == nil else {
return
}
// Allocate an additional byte to ensure we have a trailing NUL byte which is important for cases like a floating point number fragment.
let (p, c) = state.buffer.withUnsafeRawPointer {
pointer, capacity -> (UnsafeRawPointer, Int) in
let raw = UnsafeMutableRawPointer.allocate(byteCount: capacity+1, alignment: 1)
raw.copyMemory(from: pointer, byteCount: capacity)
raw.storeBytes(of: UInt8.zero, toByteOffset: capacity, as: UInt8.self)
return (.init(raw), capacity+1)
}
state = (buffer: .init(unsafeBaseAddress: p, count: c), allocation: p)
}
}
@inline(__always)
func withBuffer<T>(
for region: Region, perform closure: @Sendable (_ jsonBytes: BufferView<UInt8>, _ fullSource: BufferView<UInt8>) throws -> T
) rethrows -> T {
try dataLock.withLock {
return try closure($0.buffer[region], $0.buffer)
}
}
deinit {
dataLock.withLock {
if let allocatedPointer = $0.allocation {
precondition($0.buffer.startIndex == BufferViewIndex(rawValue: allocatedPointer))
allocatedPointer.deallocate()
}
}
}
var topObject : Value {
get throws {
try self[topObjectIndex]
}
}
subscript (objectIndex: BPlistObjectIndex) -> Value {
get throws {
return try loadValue(at: objectIndex)
}
}
func loadValue(at idx: BPlistObjectIndex) throws -> Value {
// Sendable note: We do not mutate self from within this lock
return try dataLock.withLockUnchecked { state in
guard Int(idx) < objectOffsets.count else {
throw BPlistError.corruptedValue("object index")
}
let offset = objectOffsets[Int(idx)]
let scanInfo = BPlistScanner(buffer: state.buffer, trailer: trailer)
return try scanInfo.scanObject(at: offset)
}
}
@inline(__always)
func value(from reference: BPlistObjectIndex) throws -> Value {
try loadValue(at: reference)
}
struct ArrayIterator: PlistArrayIterator {
var iter: [BPlistObjectIndex].Iterator
@inline(__always)
mutating func next() -> BPlistObjectIndex? {
iter.next()
}
}
struct DictionaryIterator: PlistDictionaryIterator {
var iter: [BPlistObjectIndex:BPlistObjectIndex].Iterator
@inline(__always)
mutating func next() -> (key: BPlistObjectIndex, value: BPlistObjectIndex)? {
iter.next()
}
}
}
extension BPlistMap.Value {
var isNull : Bool {
switch self {
case .nativeNull, .sentinelNull:
return true
default:
return false
}
}
func integerValue<T: BinaryInteger>(in map: BPlistMap, as type: T.Type, for codingPathNode: _CodingPathNode, _ additionalKey: (some CodingKey)? = _CodingKey?.none) throws -> T {
if case .real = self {
let double = try self.realValue(in: map, as: Double.self, for: codingPathNode, additionalKey)
guard let integer = T(exactly: double) else {
throw DecodingError._dataCorrupted("Property list number <\(double)> does not fit in \(type).", for: codingPathNode, additionalKey)
}
return integer
}
guard case let .integer(uint64BitPattern, useSignedRep) = self else {
throw DecodingError._typeMismatch(at: codingPathNode.path(byAppending: additionalKey), expectation: type, reality: self)
}
if !useSignedRep {
guard let val = T(exactly: uint64BitPattern) else {
throw DecodingError._dataCorrupted("Parsed property list number <\(uint64BitPattern)> does not fit in \(type).", for: codingPathNode, additionalKey)
}
return val
}
let numAsSint = Int64(bitPattern: uint64BitPattern)
guard let val = T(exactly: numAsSint) else {
throw DecodingError._dataCorrupted("Parsed property list number <\(numAsSint)> does not fit in \(type).", for: codingPathNode, additionalKey)
}
return val
}
func realValue<T: BinaryFloatingPoint>(in map: BPlistMap, as type: T.Type, for codingPathNode: _CodingPathNode, _ additionalKey: (some CodingKey)? = _CodingKey?.none) throws -> T {
if case .integer = self {
if let uintValue = try? self.integerValue(in: map, as: UInt64.self, for: codingPathNode, additionalKey) {
return T(uintValue)
}
let intValue = try self.integerValue(in: map, as: Int64.self, for: codingPathNode, additionalKey)
return T(intValue)
}
guard case let .real(uint64BitPattern, byteCount) = self else {
throw DecodingError._typeMismatch(at: codingPathNode.path(byAppending: additionalKey), expectation: type, reality: self)
}
switch byteCount {
case MemoryLayout<Float>.size:
// We only read 4 bytes, so this coercion should never fail.
let u32 = UInt32(uint64BitPattern)
let float = Float(bitPattern: u32)
guard !float.isNaN else {
return T.nan // T(exactly: X.nan) always returns nil
}
guard let result = T(exactly: float) else {
throw DecodingError._dataCorrupted("Property list number <\(float)> does not fit in \(type).", for: codingPathNode, additionalKey)
}
return result
case MemoryLayout<Double>.size:
let double = Double(bitPattern: uint64BitPattern)
guard !double.isNaN else {
return T.nan // T(exactly: X.nan) always returns nil
}
guard let result = T(exactly: double) else {
throw DecodingError._dataCorrupted("Property list number <\(double)> does not fit in \(type).", for: codingPathNode, additionalKey)
}
return result
default:
fatalError("Impossible bplist real byte count: \(byteCount)")
}
}
func dataValue(in map: BPlistMap, for codingPathNode: _CodingPathNode, _ additionalKey: (some CodingKey)? = _CodingKey?.none) throws -> Data {
guard case let .data(region) = self else {
throw DecodingError._typeMismatch(at: codingPathNode.path(byAppending: additionalKey), expectation: Data.self, reality: self)
}
return map.withBuffer(for: region) { buffer, _ in
return Data(bufferView: buffer)
}
}
func dateValue(in map: BPlistMap, for codingPathNode: _CodingPathNode, _ additionalKey: (some CodingKey)? = _CodingKey?.none) throws -> Date {
guard case let .date(u64Rep) = self else {
throw DecodingError._typeMismatch(at: codingPathNode.path(byAppending: additionalKey), expectation: Date.self, reality: self)
}
let doubleRep = Double(bitPattern: u64Rep)
return Date(timeIntervalSinceReferenceDate: doubleRep)
}
}
extension BPlistMap.Value: DecodingErrorValueTypeDebugStringConvertible {
var debugDataTypeDescription: String {
switch self {
case .string: return "a string"
case .integer: return "an integer"
case .real: return "a real number"
case .array: return "an array"
case .dict: return "a dictionary"
case .boolean: return "a boolean"
case .data: return "a data value"
case .date: return "a date"
case .set: return "a set"
case .uid: return "a uid"
case .nativeNull: return "a null value"
case .sentinelNull: return "the string \"$null\""
}
}
}
fileprivate extension BufferReader {
@inline(__always)
func getBoundsCheckedSizedInt(at idx: BufferView<UInt8>.Index, size: Int) -> UInt64 {
switch size {
case 1:
return UInt64(bytes[unchecked: idx])
case 2:
var val : UInt16
val = UInt16(bytes[unchecked: idx]) << 8
val = val | UInt16(bytes[unchecked: idx.advanced(by: 1)])
return UInt64(val)
case 4:
var val : UInt32
val = UInt32(bytes[unchecked: idx]) << 24
val = val | UInt32(bytes[unchecked: idx.advanced(by: 1)]) << 16
val = val | UInt32(bytes[unchecked: idx.advanced(by: 2)]) << 8
val = val | UInt32(bytes[unchecked: idx.advanced(by: 3)])
return UInt64(val)
case 8:
var val : UInt64
val = UInt64(bytes[unchecked: idx]) << 56
val = val | UInt64(bytes[unchecked: idx.advanced(by: 1)]) << 48
val = val | UInt64(bytes[unchecked: idx.advanced(by: 2)]) << 40
val = val | UInt64(bytes[unchecked: idx.advanced(by: 3)]) << 32
val = val | UInt64(bytes[unchecked: idx.advanced(by: 4)]) << 24
val = val | UInt64(bytes[unchecked: idx.advanced(by: 5)]) << 16
val = val | UInt64(bytes[unchecked: idx.advanced(by: 6)]) << 8
val = val | UInt64(bytes[unchecked: idx.advanced(by: 7)])
return val
case 0, 3, 5, 6, 7:
// Compatibility with existing archives which could have non-power-of-2 size.
var val : UInt64 = 0
for i in 0 ..< Int(size) {
val = (val << 8) + UInt64(bytes[unchecked: idx.advanced(by: i)])
}
return val
default:
// Compatibility with existing archives, which could include > 8 byte values, for which we only read the last 8 bytes.
var val : UInt64
let significantByteIdx = idx.advanced(by: size - 8)
val = UInt64(bytes[unchecked: significantByteIdx]) << 56
val = val | UInt64(bytes[unchecked: significantByteIdx.advanced(by: 1)]) << 48
val = val | UInt64(bytes[unchecked: significantByteIdx.advanced(by: 2)]) << 40
val = val | UInt64(bytes[unchecked: significantByteIdx.advanced(by: 3)]) << 32
val = val | UInt64(bytes[unchecked: significantByteIdx.advanced(by: 4)]) << 24
val = val | UInt64(bytes[unchecked: significantByteIdx.advanced(by: 5)]) << 16
val = val | UInt64(bytes[unchecked: significantByteIdx.advanced(by: 6)]) << 8
val = val | UInt64(bytes[unchecked: significantByteIdx.advanced(by: 7)])
return val
}
}
@inline(__always)
func getSizedInt(at idx: BufferView<UInt8>.Index, endIndex: BufferView<UInt8>.Index, size: Int) -> UInt64? {
guard size <= idx.distance(to: endIndex) else {
return nil
}
return getBoundsCheckedSizedInt(at: idx, size: size)
}
func readInt(updatingIndex idx: inout BufferView<UInt8>.Index, objectRangeEnd: BufferView<UInt8>.Index, for type: String) throws -> UInt64 {
guard idx < objectRangeEnd else {
throw BPlistError.corruptedValue(type)
}
let marker = bytes[unchecked: idx]
bytes.formIndex(after: &idx)
guard BPlistTypeMarker(marker) == .int else {
throw BPlistError.corruptedValue(type)
}
let sizeOfInteger = 1 << (marker & 0x0f)
// integers are not required to be in the most compact possible representation, but only the last 64 bits are significant currently
guard let result = getSizedInt(at: idx, endIndex: objectRangeEnd, size: sizeOfInteger) else {
throw BPlistError.corruptedValue(type)
}
bytes.formIndex(&idx, offsetBy: sizeOfInteger)
return result
}
}
private func addCheckingForOverflow(_ a: UInt64, _ b: UInt64, overflow : inout Bool) -> UInt64 {
if overflow { return 0 }
let (result, over) = a.addingReportingOverflow(b)
overflow = over
return result
}
internal struct BPlistScanner {
var reader : BufferReader
let baseIdx : BufferView<UInt8>.Index
let trailer : BPlistTrailer
private static let bplistXXLen = 8
static func hasBPlistMagic(in buff: BufferView<UInt8>) -> Bool {
guard buff.count >= MemoryLayout<BPlistTrailer>.size + bplistXXLen + 1 else {
return false
}
let reader = BufferReader(bytes: buff)
guard reader.string(at: buff.startIndex, matches: "bplist0") else {
return false
}
return true
}
static func parseTopLevelInfo(from buff: BufferView<UInt8>) -> BPlistTrailer? {
guard hasBPlistMagic(in: buff) else {
return nil
}
let trailer = buff.withUnsafePointer { buffPtr, buffCount in
var trailer = BPlistTrailer()
let trailerBegin = buffPtr + buffCount - MemoryLayout<BPlistTrailer>.size
_ = withUnsafeMutableBytes(of: &trailer) {
memmove($0.baseAddress!, trailerBegin, MemoryLayout<BPlistTrailer>.size)
}
// The bplist format is big endian by definition. On a little-endian machine, the 64-bit values need to be swapped. X.bigEndian is equivalent to "convert big- to host-endianness".
trailer._numObjects = trailer._numObjects.bigEndian
trailer._topObject = trailer._topObject.bigEndian
trailer._offsetTableOffset = trailer._offsetTableOffset.bigEndian
return trailer
}
// Don't overflow on the number of objects or offset of the table
guard trailer._numObjects <= LONG_MAX, trailer._offsetTableOffset <= LONG_MAX else {
return nil
}
// Must be a minimum of 1 object
guard trailer._numObjects >= 1 else {
return nil
}
// The ref to the top object must be a value in the range of 1 to the total number of objects
guard trailer._numObjects > trailer._topObject else {
return nil
}
// The offset table must be after at least 9 bytes of other data ('bplist??' + 1 byte of object table data).
guard trailer._offsetTableOffset >= 9 else {
return nil
}
// The trailer must point to a value before itself in the data.
guard buff.count - MemoryLayout<BPlistTrailer>.size > trailer._offsetTableOffset else {
return nil
}
// Minimum of 1 byte for the size of integers and references in the data
guard trailer._offsetIntSize >= 1, trailer._objectRefSize >= 1 else {
return nil
}
// The total size of the offset table (number of objects * size of each int in the table) must not overflow
let offsetTableSize : UInt64
var overflow = false
(offsetTableSize, overflow) = trailer._numObjects.multipliedReportingOverflow(by: UInt64(trailer._offsetIntSize))
guard !overflow else {
return nil
}
// The offset table must have at least 1 entry
guard offsetTableSize >= 1 else {
return nil
}
// Make sure the size of the offset table and data sections do not overflow
let objectDataSize = trailer._offsetTableOffset - 8
var tmpSum = addCheckingForOverflow(8, objectDataSize, overflow: &overflow)
tmpSum = addCheckingForOverflow(tmpSum, offsetTableSize, overflow: &overflow)
tmpSum = addCheckingForOverflow(tmpSum, UInt64(MemoryLayout<BPlistTrailer>.size), overflow: &overflow)
guard !overflow else {
return nil
}
// The total size of the data should be equal to the sum of offsetTableOffset + sizeof(trailer)
guard buff.count == tmpSum else {
return nil
}
// The object refs must be the right size to point into the offset table. That is, if the count of objects is 260, but only 1 byte is used to store references (max value 255), something is wrong.
if trailer._objectRefSize < 8 && 1<<(8 * trailer._objectRefSize) <= trailer._numObjects {
return nil
}
// The integers used for pointers in the offset table must be able to reach as far as the start of the offset table.
if trailer._offsetIntSize < 8 && 1<<(8 * trailer._offsetIntSize) <= trailer._offsetTableOffset {
return nil
}
// We're deferring the validation of all the entries of the offsetTable to scanBinaryPropertyList() time. However, we will still check that the top object offset is valid, as has been done in __CFBinaryPlistGetTopLevelInfo.
var (topObjectOffsetOffset, topObjectOverflow) = Int(trailer._topObject).multipliedReportingOverflow(by: Int(trailer._offsetIntSize))
guard !topObjectOverflow else {
return nil
}
(topObjectOffsetOffset, topObjectOverflow) = Int(trailer._offsetTableOffset).addingReportingOverflow(topObjectOffsetOffset)
guard !topObjectOverflow else {
return nil
}
guard buff.count > topObjectOffsetOffset else {
return nil
}
let reader = BufferReader(bytes: buff)
let topObjectOffsetIdx = reader.index(offset: topObjectOffsetOffset)
guard let topObjectOffset = reader.getSizedInt(at: topObjectOffsetIdx, endIndex: buff.endIndex, size: Int(trailer._offsetIntSize)) else {
return nil
}
// Must fall somewhere after bplistXX and before the beginning of the offset table.
guard topObjectOffset >= 8, topObjectOffset < trailer._offsetTableOffset else {
return nil
}
return trailer
}
init(buffer: BufferView<UInt8>, trailer: BPlistTrailer) {
self.trailer = trailer
self.reader = BufferReader(bytes: buffer)
self.baseIdx = buffer.startIndex
}
static func scanBinaryPropertyList(from buffer: BufferView<UInt8>) throws -> BPlistMap {
guard let trailer = Self.parseTopLevelInfo(from: buffer) else {
throw BPlistError.corruptTopLevelInfo
}
var objectOffsets = [UInt64]()
let initialCapacity = min(Int(trailer._numObjects), 1024 * 256) // Enforce an arbitrary ceiling for the size we'll attempt to reserve in this array. Untrusted input shouldn't cause us to allocate insane amounts of memory so easily.
objectOffsets.reserveCapacity(initialCapacity)
// Ensure that all object offsets in the archive are valid. This enables us to access the buffer later without redundant bounds checking.
let reader = BufferReader(bytes: buffer)
var objectTableCursor = buffer.startIndex.advanced(by: Int(trailer._offsetTableOffset))
let endIdx = buffer.endIndex
let maxOffset = trailer._offsetTableOffset - 1
for _ in 0 ..< trailer._numObjects {
guard let off = reader.getSizedInt(at: objectTableCursor, endIndex: endIdx, size: Int(trailer._offsetIntSize)), off <= maxOffset else {
throw BPlistError.corruptTopLevelInfo
}
objectOffsets.append(off)
buffer.formIndex(&objectTableCursor, offsetBy: Int(trailer._offsetIntSize))
}
return .init(buffer: buffer, trailer: trailer, objectOffsets: objectOffsets)
}
func scanObject(at offset: UInt64) throws -> BPlistMap.Value {
let idx = reader.index(offset: try Int(bplistSafe: offset))
let rawMarker = reader.char(at: idx)
let objectRangeEndIdx = baseIdx.advanced(by: Int(trailer._offsetTableOffset))
let typeMarker = BPlistTypeMarker(rawMarker)
switch typeMarker {
case .null:
return .nativeNull
case .false:
return .boolean(false)
case .true:
return .boolean(true)
case .int:
return try scanInteger(rawTypeMarker: rawMarker, index: idx, objectRangeEndIndex: objectRangeEndIdx)
case .real:
return try scanReal(rawTypeMarker: rawMarker, index: idx, objectRangeEndIndex: objectRangeEndIdx)
case .date:
return try scanDate(index: idx, objectRangeEndIndex: objectRangeEndIdx)
case .data:
return try scanData(rawTypeMarker: rawMarker, index: idx, objectRangeEndIndex: objectRangeEndIdx)
case .asciiString:
return try scanASCIIString(rawTypeMarker: rawMarker, index: idx, objectRangeEndIndex: objectRangeEndIdx)
case .utf16String:
return try scanUTF16BEString(rawTypeMarker: rawMarker, index: idx, objectRangeEndIndex: objectRangeEndIdx)
case .uid:
// NSKeyedArchiver UIDs are unused by PropertyListDecoder, so we don't really need to bother parsing their data.
return .uid
case .array, .set:
return try scanArrayOrSet(typeMarker: typeMarker!, rawTypeMarker: rawMarker, index: idx, objectRangeEndIndex: objectRangeEndIdx)
case .dict:
return try scanDictionary(rawTypeMarker: rawMarker, index: idx, objectRangeEndIndex: objectRangeEndIdx)
default:
throw BPlistError.invalidMarker
}
}
private func scanInteger(rawTypeMarker: UInt8, index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
let integerSize = 1 << (rawTypeMarker & 0x0f)
guard integerSize <= 16 else {
throw BPlistError.invalidMarker
}
// Anything over 8 bytes is definitely supposed to be interpreted as an unsigned value. However, only the least signifiant 8 bytes are respected. On the encoding side, for 64-bit unsigned integers, the top 8 bytes are always all zeroes. This is how we differentiate UInt64.max and Int64(-1)
let dataStartIdx = idx.advanced(by: 1)
guard let integer = reader.getSizedInt(at: dataStartIdx, endIndex: objectRangeEndIndex, size: integerSize) else {
throw BPlistError.corruptedValue("integer")
}
return .integer(integer, useSignedRepresentation: integerSize <= MemoryLayout<UInt64>.size)
}
private func scanReal(rawTypeMarker: UInt8, index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
let dataStartIdx = idx.advanced(by: 1)
switch rawTypeMarker & 0xf {
case 2: // 4 byte real
guard let integer = reader.getSizedInt(at: dataStartIdx, endIndex: objectRangeEndIndex, size: 4) else {
throw BPlistError.corruptedValue("real")
}
return .real(integer, byteCount: 4)
case 3: // 8 byte real
guard let integer = reader.getSizedInt(at: dataStartIdx, endIndex: objectRangeEndIndex, size: 8) else {
throw BPlistError.corruptedValue("real")
}
return .real(integer, byteCount: 8)
default:
throw BPlistError.invalidMarker
}
}
private func scanDate(index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
let dataStartIdx = idx.advanced(by: 1)
guard let integer = reader.getSizedInt(at: dataStartIdx, endIndex: objectRangeEndIndex, size: 8) else {
throw BPlistError.corruptedValue("date")
}
return .date(integer)
}
private func scanData(rawTypeMarker: UInt8, index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
var count = UInt64(rawTypeMarker & 0x0f)
var dataStartIdx = idx.advanced(by: 1)
if count == 0xf {
count = try reader.readInt(updatingIndex: &dataStartIdx, objectRangeEnd: objectRangeEndIndex, for: "data")
}
guard dataStartIdx.distance(to: objectRangeEndIndex) >= count else {
throw BPlistError.corruptedValue("data")
}
return .data(.init(startOffset: baseIdx.distance(to: dataStartIdx), count: Int(count)))
}
private func scanASCIIString(rawTypeMarker: UInt8, index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
var count = UInt64(rawTypeMarker & 0x0f)
var dataStartIdx = idx.advanced(by: 1)
if count == 0xf {
count = try reader.readInt(updatingIndex: &dataStartIdx, objectRangeEnd: objectRangeEndIndex, for: "ASCII string")
}
guard dataStartIdx.distance(to: objectRangeEndIndex) >= count else {
throw BPlistError.corruptedValue("ASCII string")
}
// Yes, this means that JSONDecoder does not allow recognizing an encoded string value of "$null" as a string. It will always be treated as a null value. This has always been true for JSONDecoder, despite it not being true for NSJSONSerialization.
if count == _plistNull.utf8CodeUnitCount, reader.char(at: dataStartIdx) == UInt8(ascii: "$"), reader.string(at: dataStartIdx, matches: _plistNull) {
return .sentinelNull
}
return .string(.init(startOffset: baseIdx.distance(to: dataStartIdx), count: Int(count)), isAscii: true)
}
private func scanUTF16BEString(rawTypeMarker: UInt8, index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
var count = UInt64(rawTypeMarker & 0x0f)
var dataStartIdx = idx.advanced(by: 1)
if count == 0xf {
count = try reader.readInt(updatingIndex: &dataStartIdx, objectRangeEnd: objectRangeEndIndex, for: "UTF16 string")
}
guard dataStartIdx.distance(to: objectRangeEndIndex) >= count else {
throw BPlistError.corruptedValue("UTF16 string")
}
let (byteCount, overflow) = count.multipliedReportingOverflow(by: 2) // 2 bytes per character
guard !overflow else {
throw BPlistError.corruptedValue("UTF16 string")
}
// We never emit "$null" as a UTF16 value in bplist, so we shouldn't need to try to detect it here.
return .string(.init(startOffset: baseIdx.distance(to: dataStartIdx), count: Int(byteCount)), isAscii: false)
}
private func scanArrayOrSet(typeMarker: BPlistTypeMarker, rawTypeMarker: UInt8, index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
var count = UInt64(rawTypeMarker & 0x0f)
var dataStartIdx = idx.advanced(by: 1)
if count == 0xf {
count = try reader.readInt(updatingIndex: &dataStartIdx, objectRangeEnd: objectRangeEndIndex, for: "array")
}
let refSize = Int(trailer._objectRefSize)
let (byteCount, overflow) = count.multipliedReportingOverflow(by: UInt64(refSize))
guard !overflow, dataStartIdx.distance(to: objectRangeEndIndex) >= Int(byteCount) else {
throw BPlistError.corruptedValue("array")
}
var indexCursor = dataStartIdx
var arr = [BPlistObjectIndex]()
let initialCapacity = min(count, 1024 * 256) // Enforce an arbitrary ceiling for the size we'll attempt to reserve in this array. Untrusted input shouldn't cause us to allocate insane amounts of memory so easily.
arr.reserveCapacity(Int(initialCapacity))
for _ in 0..<Int(count) {
arr.append(try Int(bplistSafe: reader.getBoundsCheckedSizedInt(at: indexCursor, size: refSize)))
reader.bytes.formIndex(&indexCursor, offsetBy: refSize)
}
return (typeMarker == .array) ? .array(arr) : .set(arr)
}
private func scanDictionary(rawTypeMarker: UInt8, index idx: BufferViewIndex<UInt8>, objectRangeEndIndex: BufferViewIndex<UInt8>) throws -> BPlistMap.Value {
var count = UInt64(rawTypeMarker & 0x0f)
var dataStartIdx = idx.advanced(by: 1)
if count == 0xf {
count = try reader.readInt(updatingIndex: &dataStartIdx, objectRangeEnd: objectRangeEndIndex, for: "dictionary")
}
let (keyPlusObjectCount, overflow) = count.multipliedReportingOverflow(by: 2) // key + object per "count"
guard !overflow else {
throw BPlistError.corruptedValue("dictionary")
}
let refSize = Int(trailer._objectRefSize)
let (byteCount, overflow2) = keyPlusObjectCount.multipliedReportingOverflow(by: UInt64(refSize))
guard !overflow2, dataStartIdx.distance(to: objectRangeEndIndex) >= Int(byteCount) else {
throw BPlistError.corruptedValue("dictionary")
}
var dict = [BPlistObjectIndex:BPlistObjectIndex](minimumCapacity: Int(count))
let offsetFromKeyToObject = Int(count) * Int(trailer._objectRefSize)
var keyIndexCursor = dataStartIdx
for _ in 0..<Int(count) {
let keyIdx = try Int(bplistSafe: reader.getBoundsCheckedSizedInt(at: keyIndexCursor, size: refSize))
let valIdx = try Int(bplistSafe: reader.getBoundsCheckedSizedInt(at: keyIndexCursor.advanced(by: offsetFromKeyToObject), size: refSize))
dict[keyIdx] = valIdx
reader.bytes.formIndex(&keyIndexCursor, offsetBy: refSize)
}
return .dict(dict)
}
}
extension Int {
@inline(__always)
init (bplistSafe val: some FixedWidthInteger) throws {
guard let i = Int(exactly: val) else {
throw BPlistError.corruptedValue("integer")
}
self = i
}
}
enum BPlistError: Swift.Error, Equatable {
case invalidMarker
case corruptedValue(String)
case corruptTopLevelInfo
var debugDescription : String {
switch self {
case .invalidMarker: return "Invalid marker"
case .corruptedValue(let type): return "Corrupt \(type) value"
case .corruptTopLevelInfo: return "Corrupt top-level info"
}
}
var cocoaError: CocoaError {
.init(.propertyListReadCorrupt, userInfo: [
NSDebugDescriptionErrorKey : self.debugDescription
])
}
}
extension BufferView<UInt8> {
// TODO: Here temporarily until it can be moved to CodableUtilities.swift on the FoundationPreview size
internal subscript(region: BPlistMap.Region) -> BufferView {
slice(from: region.startOffset, count: region.count)
}
internal subscript(unchecked region: BPlistMap.Region) -> BufferView {
uncheckedSlice(from: region.startOffset, count: region.count)
}
}