Tony Parker 39344d7f61
Reduce number of read calls when not asking for progress reporting. (#623)
* Reduce number of read calls when not asking for progress reporting.

Also, when asking for progress reporting, cap the number of updates at approximately 100 instead of fileSize/4k.

* Add readUntilEOF option to file reading primitive and set it to true by default

* Address review comments - change readUntilEOF to readUntilLength
2024-05-23 14:36:07 -07:00

477 lines
20 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#if FOUNDATION_FRAMEWORK
internal import _ForSwiftFoundation
#endif
internal import _CShims
#if canImport(Darwin)
import Darwin
#elseif canImport(Glibc)
import Glibc
#elseif os(Windows)
import CRT
import WinSDK
#endif
func _fgetxattr(_ fd: Int32, _ name: UnsafePointer<CChar>!, _ value: UnsafeMutableRawPointer!, _ size: Int, _ position: UInt32, _ options: Int32) -> Int {
#if canImport(Darwin)
return fgetxattr(fd, name, value, size, position, options)
#elseif canImport(Glibc)
return fgetxattr(fd, name, value, size)
#else
return -1
#endif
}
private func readExtendedAttributesFromFileDescriptor(_ fd: Int32, attrsToRead: [String]) -> [String : Data] {
#if !NO_FILESYSTEM
var output: [String : Data] = [:]
for key in attrsToRead {
key.withCString { keyStr in
let maxXAttrLength = 1000
withUnsafeTemporaryAllocation(of: CUnsignedChar.self, capacity: maxXAttrLength) { buf in
let result = _fgetxattr(fd, keyStr, buf.baseAddress, maxXAttrLength, 0, 0)
if result != -1 {
// -1 means no such attribute
// Force unwrap buffer - if we do not have a base address, assert is appropriate.
output[key] = Data(bytes: buf.baseAddress!, count: result)
} else if errno == ERANGE {
// ERANGE indicates that the buffer was too small
// Get its needed size (passing nil buffer)
let neededSize = _fgetxattr(fd, keyStr, nil, 0, 0, 0)
let fullBuffer = malloc(neededSize)!
if _fgetxattr(fd, keyStr, fullBuffer, neededSize, 0, 0) != neededSize {
// If still an error, then give up
free(fullBuffer)
} else {
output[key] = Data(bytesNoCopy: fullBuffer, count: neededSize, deallocator: .free)
}
}
}
}
}
return output
#else
// No extended attributes on this platform
return [:]
#endif
}
private func shouldMapFileDescriptor(_ fd: Int32, path: PathOrURL, options: Data.ReadingOptions) -> Bool {
if options.contains(.alwaysMapped) {
return true
}
if options.contains(.mappedIfSafe) {
#if FOUNDATION_FRAMEWORK && !NO_FILESYSTEM
// Failures from here out are non-fatal.
// The file's protection class must not be == 'Class A'
let protectionClass = fcntl(fd, F_GETPROTECTIONCLASS)
let protectionClassIsSafeToMap = protectionClass >= 0 && protectionClass != -1 /* Class A */
if protectionClassIsSafeToMap {
// The file must live on a local (== not network), non-removal volume.
var fileSystemInfo = statfs()
if fstatfs(fd, &fileSystemInfo) == 0 {
if (fileSystemInfo.f_flags & UInt32(MNT_LOCAL) != 0) && (fileSystemInfo.f_flags & UInt32(MNT_REMOVABLE) == 0) {
// The file must not be compressed in a format that requires another process to be running in order to provide the contents (i.e. type 5)
// An unfortunate path-based operation in the midst of all this fd-specific work.
// Checks with AppleFSCompression
return path.withFileSystemRepresentation { pathFileSystemRep in
guard let pathFileSystemRep else { return true }
return _NSFileCompressionTypeIsSafeForMapping(pathFileSystemRep)
}
}
}
}
#else
// For other platforms or configurations, any appropriate checks would go here. For now, we assume it is unsafe.
return false
#endif
}
return false
}
// MARK: - Reading
#if FOUNDATION_FRAMEWORK
extension NSData {
/// Objective-C entry point to Swift `Data` reading. Returns bytes that must be freed with `free` or `unmap` as requested.
@objc(_readBytesFromPath:maxLength:bytes:length:didMap:options:reportProgress:error:)
internal static func _readBytes(fromPath path: String, maxLength: Int, bytes: UnsafeMutablePointer<UnsafeMutableRawPointer?>, length: UnsafeMutablePointer<Int>, didMap: UnsafeMutablePointer<ObjCBool>, options: Data.ReadingOptions, reportProgress: Bool) throws {
var attrs: [String : Data] = [:]
let result = try readBytesFromFile(path: .path(path), reportProgress: reportProgress, maxLength: maxLength == Int.max ? nil : maxLength, options: options, attributesToRead: [], attributes: &attrs)
bytes.pointee = result.bytes
length.pointee = result.length
switch result.deallocator {
case .unmap:
didMap.pointee = ObjCBool(true)
default:
didMap.pointee = ObjCBool(false)
}
}
/// Objective-C entry point to Swift `Data` reading. Returns bytes that must be freed with `free` or `unmap` as requested.
@objc(_readBytesAndEncodingFromPath:maxLength:encoding:bytes:length:didMap:options:reportProgress:error:)
internal static func _readBytesAndEncoding(fromPath path: String, maxLength: Int, encoding outEncoding: UnsafeMutablePointer<UInt>, bytes: UnsafeMutablePointer<UnsafeMutableRawPointer?>, length: UnsafeMutablePointer<Int>, didMap: UnsafeMutablePointer<ObjCBool>, options: Data.ReadingOptions, reportProgress: Bool) throws {
var attrs: [String : Data] = [:]
let result = try readBytesFromFile(path: .path(path), reportProgress: reportProgress, maxLength: maxLength == Int.max ? nil : maxLength, options: options, attributesToRead: [NSFileAttributeStringEncoding], attributes: &attrs)
if let encodingAttributeData = attrs[NSFileAttributeStringEncoding], let encoding = encodingFromDataForExtendedAttribute(encodingAttributeData) {
outEncoding.pointee = encoding.rawValue
} else {
outEncoding.pointee = UInt(kCFStringEncodingInvalidId)
}
bytes.pointee = result.bytes
length.pointee = result.length
switch result.deallocator {
case .unmap:
didMap.pointee = ObjCBool(true)
default:
didMap.pointee = ObjCBool(false)
}
}
}
#endif
internal func readDataFromFile(path inPath: PathOrURL, reportProgress: Bool, maxLength: Int? = nil, options: Data.ReadingOptions = []) throws -> Data {
var attributes: [String : Data] = [:]
return try readDataFromFile(path: inPath, reportProgress: reportProgress, maxLength: maxLength, options: options, attributesToRead: [], attributes: &attributes)
}
internal func readDataFromFile(path inPath: PathOrURL, reportProgress: Bool, maxLength: Int? = nil, options: Data.ReadingOptions = [], attributesToRead: [String], attributes: inout [String: Data]) throws -> Data {
let result = try readBytesFromFile(path: inPath, reportProgress: reportProgress, maxLength: maxLength, options: options, attributesToRead: attributesToRead, attributes: &attributes)
if result.length == 0 {
return Data()
} else {
return Data(bytesNoCopy: result.bytes!, count: result.length, deallocator: result.deallocator!)
}
}
struct ReadBytesResult {
/// Pointer to the read bytes.
var bytes: UnsafeMutableRawPointer?
/// Number of bytes.
/// Matches `Data`'s count type.
var length: Int
/// The deallocator to use for these bytes, or nil if no deallocator is needed.
var deallocator: Data.Deallocator?
}
#if os(Windows)
private func read(from hFile: HANDLE, at path: PathOrURL,
into pBuffer: UnsafeMutableRawPointer, length dwLength: Int,
chunkSize dwChunk: Int = 4096, progress bProgress: Bool)
throws -> Int {
var pBuffer = pBuffer
let progress = bProgress && Progress.current() != nil ? Progress(totalUnitCount: Int64(dwLength)) : nil
var dwBytesRemaining: DWORD = DWORD(dwLength)
while dwBytesRemaining > 0 {
if let progress, progress.isCancelled {
throw CocoaError(.userCancelled)
}
let dwBytesToRead: DWORD =
DWORD(clamping: DWORD(min(DWORD(dwChunk), dwBytesRemaining)))
var dwBytesRead: DWORD = 0
if !ReadFile(hFile, pBuffer, dwBytesToRead, &dwBytesRead, nil) {
throw CocoaError.errorWithFilePath(path, win32: GetLastError(), reading: true)
}
dwBytesRemaining -= DWORD(clamping: dwBytesRead)
progress?.completedUnitCount = Int64(dwLength - Int(dwBytesRemaining))
if dwBytesRead < dwBytesToRead {
break
}
pBuffer = pBuffer.advanced(by: Int(dwBytesRead))
}
return dwLength - Int(dwBytesRemaining)
}
#endif
internal func readBytesFromFile(path inPath: PathOrURL, reportProgress: Bool, maxLength: Int?, options: Data.ReadingOptions, attributesToRead: [String], attributes: inout [String: Data]) throws -> ReadBytesResult {
if inPath.isEmpty {
// For compatibility, throw a different error than the perhaps-expected 'file not found' here (41646641)
throw CocoaError(.fileReadInvalidFileName)
}
#if os(Windows)
let hFile: HANDLE = try inPath.path.withNTPathRepresentation {
CreateFileW($0, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, nil, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nil)
}
if hFile == INVALID_HANDLE_VALUE {
throw CocoaError.errorWithFilePath(inPath, win32: GetLastError(), reading: true)
}
defer {
CloseHandle(hFile)
}
var fiFileInfo: BY_HANDLE_FILE_INFORMATION = BY_HANDLE_FILE_INFORMATION()
guard GetFileInformationByHandle(hFile, &fiFileInfo) else {
throw CocoaError.errorWithFilePath(inPath, win32: GetLastError(), reading: true)
}
let ftFileType: DWORD = GetFileType(hFile)
let bIsDirectory = fiFileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY == FILE_ATTRIBUTE_DIRECTORY
guard ftFileType == FILE_TYPE_DISK, !bIsDirectory else {
throw CocoaError.errorWithFilePath(inPath, errno: bIsDirectory ? EISDIR : EACCES, reading: true)
}
let szFileSize: UInt64 =
UInt64(fiFileInfo.nFileSizeHigh) << 32 | UInt64(fiFileInfo.nFileSizeLow << 0)
if szFileSize > Int.max {
throw CocoaError.errorWithFilePath(inPath, errno: EFBIG, reading: true)
}
let localProgress = (reportProgress && Progress.current() != nil) ? Progress(totalUnitCount: Int64(szFileSize)) : nil
guard szFileSize > 0 else {
localProgress?.totalUnitCount = 1
localProgress?.completedUnitCount = 1
return ReadBytesResult(bytes: nil, length: 0, deallocator: nil)
}
if options.contains(.alwaysMapped) {
let hMapping: HANDLE = CreateFileMappingW(hFile, nil, PAGE_READONLY, 0, 0, nil)
if hMapping == INVALID_HANDLE_VALUE {
throw CocoaError.errorWithFilePath(inPath, win32: GetLastError(), reading: true)
}
let szMapSize: UInt64 = min(UInt64(maxLength ?? Int.max), szFileSize)
let pData: UnsafeMutableRawPointer =
MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, SIZE_T(szMapSize))
return ReadBytesResult(bytes: pData, length: Int(szMapSize), deallocator: .custom({ hMapping, _ in
guard UnmapViewOfFile(hMapping) else {
fatalError("UnmapViewOfFile")
}
guard CloseHandle(hMapping) else {
fatalError("CloseHandle")
}
}))
} else {
guard let pBuffer: UnsafeMutableRawPointer = malloc(Int(szFileSize)) else {
throw CocoaError.errorWithFilePath(inPath, errno: ENOMEM, reading: true)
}
localProgress?.becomeCurrent(withPendingUnitCount: Int64(szFileSize))
do {
let dwLength = try read(from: hFile, at: inPath, into: pBuffer, length: Int(szFileSize), progress: reportProgress)
localProgress?.resignCurrent()
return ReadBytesResult(bytes: pBuffer, length: dwLength, deallocator: .free)
} catch {
localProgress?.resignCurrent()
free(pBuffer)
throw error
}
}
#else
let fd = try inPath.withFileSystemRepresentation { inPathFileSystemRep in
guard let inPathFileSystemRep else {
throw CocoaError(.fileReadInvalidFileName)
}
return open(inPathFileSystemRep, O_RDONLY, 0o666)
}
guard fd >= 0 else {
throw CocoaError.errorWithFilePath(inPath, errno: errno, reading: true)
}
defer {
close(fd)
}
#if FOUNDATION_FRAMEWORK
if options.contains(.uncached) {
// Non-zero arg turns off caching; we ignore error as uncached is just a hint
_ = fcntl(fd, F_NOCACHE, 1)
}
#endif
var filestat: stat = stat()
let err = fstat(fd, &filestat)
guard err == 0 else {
throw CocoaError.errorWithFilePath(inPath, errno: errno, reading: true)
}
// The following check is valid for 64-bit platforms.
if filestat.st_size > Int.max {
// We cannot hold this in `Data`, which uses Int as its count.
throw CocoaError.errorWithFilePath(inPath, errno: EFBIG, reading: true)
}
let fileSize = min(Int(clamping: filestat.st_size), maxLength ?? Int.max)
let fileType = filestat.st_mode & S_IFMT
#if !NO_FILESYSTEM
let shouldMap = shouldMapFileDescriptor(fd, path: inPath, options: options)
#else
let shouldMap = false
#endif
if fileType != S_IFREG {
// EACCES is still an odd choice, but at least we have a better error for directories.
let code = (fileType == S_IFDIR) ? EISDIR : EACCES
throw CocoaError.errorWithFilePath(inPath, errno: code, reading: true)
}
if fileSize < 0 {
throw CocoaError.errorWithFilePath(inPath, errno: ENOMEM, reading: true)
}
#if _pointerBitWidth(_32)
// Refuse to do more than 2 GB on 32-bit platforms
if fileSize > SSIZE_MAX {
throw CocoaError.errorWithFilePath(inPath, errno: EFBIG, reading: true)
}
#endif
let result: ReadBytesResult
let localProgress = (reportProgress && Progress.current() != nil) ? Progress(totalUnitCount: Int64(fileSize)) : nil
if fileSize == 0 {
localProgress?.totalUnitCount = 1
localProgress?.completedUnitCount = 1
result = ReadBytesResult(bytes: nil, length: 0, deallocator: nil)
} else if shouldMap {
#if !NO_FILESYSTEM
guard let bytes = mmap(nil, Int(fileSize), PROT_READ, MAP_PRIVATE, fd, 0) else {
throw CocoaError.errorWithFilePath(inPath, errno: errno, reading: true)
}
guard bytes != MAP_FAILED else {
throw CocoaError.errorWithFilePath(inPath, errno: errno, reading: true)
}
// Using bytes as the unit in this case doesn't really make any sense, since the amount of work required for mmap isn't meanginfully proportional to the size being mapped.
localProgress?.totalUnitCount = 1
localProgress?.completedUnitCount = 1
result = ReadBytesResult(bytes: bytes, length: Int(fileSize), deallocator: .unmap)
#else
// This was disabled above
fatalError("mapping should not be enabled")
#endif
} else {
// We've verified above that fileSize will fit in `Int`
guard let bytes = malloc(Int(fileSize)) else {
throw CocoaError.errorWithFilePath(inPath, errno: ENOMEM, reading: true)
}
localProgress?.becomeCurrent(withPendingUnitCount: Int64(fileSize))
do {
let length = try readBytesFromFileDescriptor(fd, path: inPath, buffer: bytes, length: fileSize, reportProgress: reportProgress)
localProgress?.resignCurrent()
result = ReadBytesResult(bytes: bytes, length: length, deallocator: .free)
} catch {
localProgress?.resignCurrent()
free(bytes)
throw error
}
}
if !attributesToRead.isEmpty {
attributes = readExtendedAttributesFromFileDescriptor(fd, attrsToRead: attributesToRead)
}
return result
#endif
}
/// Read data from a file descriptor.
/// Takes an `Int` size and returns an `Int` to match `Data`'s count. If we are going to read more than Int.max, throws - because we won't be able to store it in `Data`.
/// If `readUntilLength` is `false`, then we will end the read if we receive less than `length` bytes. This can be used to read from something like a socket, where the `length` simply represents the maximum size you can read at once.
private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer inBuffer: UnsafeMutableRawPointer, length: Int, readUntilLength: Bool = true, reportProgress: Bool) throws -> Int {
var buffer = inBuffer
// If chunkSize (8-byte value) is more than blksize_t.max (4 byte value), then use the 4 byte max and chunk
let preferredChunkSize: size_t
let localProgress: Progress?
if Progress.current() != nil && reportProgress {
localProgress = Progress(totalUnitCount: Int64(length))
// To report progress, we have to try reading in smaller chunks than the whole file. Aim for about 1% increments.
preferredChunkSize = max(length / 100, 1024 * 4)
} else {
localProgress = nil
// Get it all in one go, if possible
preferredChunkSize = length
}
var numBytesRemaining = length
while numBytesRemaining > 0 {
if let localProgress, localProgress.isCancelled {
throw CocoaError(.userCancelled)
}
// We will only request a max of Int32.max bytes. Some platforms will return an error over that amount.
var numBytesRequested = CUnsignedInt(clamping: min(preferredChunkSize, Int(CInt.max)))
// Furthermore, don't request more than the number of bytes remaining
if numBytesRequested > numBytesRemaining {
numBytesRequested = CUnsignedInt(clamping: min(numBytesRemaining, Int(CInt.max)))
}
var numBytesRead: CInt
repeat {
if let localProgress, localProgress.isCancelled {
throw CocoaError(.userCancelled)
}
// read takes an Int-sized argument, which will always be at least the size of Int32.
#if os(Windows)
numBytesRead = _read(fd, buffer, numBytesRequested)
#else
numBytesRead = CInt(read(fd, buffer, Int(numBytesRequested)))
#endif
} while numBytesRead < 0 && errno == EINTR
if numBytesRead < 0 {
// The read failed
let errNum = errno
logFileIOErrno(errNum, at: "read")
throw CocoaError.errorWithFilePath(path, errno: errNum, reading: true)
} else if numBytesRead == 0 {
// Getting zero here is weird, since it may imply unexpected end of file... If we do, return the number of bytes read so far (which is compatible with the way read() would work with just one call).
break
} else {
// Partial read
numBytesRemaining -= Int(clamping: numBytesRead)
if numBytesRemaining < 0 {
// Just in case; we do not want to have a negative amount of bytes remaining. We will just assume that is the end.
numBytesRemaining = 0
}
localProgress?.completedUnitCount = Int64(length - numBytesRemaining)
// The `readUntilLength` argument controls if we should end early when `read` returns less than the amount requested, or if we should continue to loop until we have reached `length` bytes.
if !readUntilLength && numBytesRead < numBytesRequested {
break
}
buffer = buffer.advanced(by: numericCast(numBytesRead))
}
}
return length - numBytesRemaining
}