/*
 * SystemMonitor.cpp
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "flow/flow.h"
#include "flow/Histogram.h"
#include "flow/Platform.h"
#include "flow/TDMetric.actor.h"
#include "flow/SystemMonitor.h"

#if defined(ALLOC_INSTRUMENTATION) && defined(__linux__)
#include <cxxabi.h>
#endif

SystemMonitorMachineState machineState;

void initializeSystemMonitorMachineState(SystemMonitorMachineState machineState) {
	::machineState = machineState;

	ASSERT(g_network);
	::machineState.monitorStartTime = now();
}

double machineStartTime() {
	return ::machineState.monitorStartTime;
}

void systemMonitor() {
	static StatisticsState statState = StatisticsState();
#if !DEBUG_DETERMINISM
	customSystemMonitor("ProcessMetrics", &statState, true);
#endif
}

SystemStatistics getSystemStatistics() {
	static StatisticsState statState = StatisticsState();
	const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
	return getSystemStatistics(
	    machineState.folder.present() ? machineState.folder.get() : "", &ipAddr, &statState.systemState, false);
}

#define TRACEALLOCATOR(size)                                                                                           \
	TraceEvent("MemSample")                                                                                            \
	    .detail("Count", FastAllocator<size>::getApproximateMemoryUnused() / size)                                     \
	    .detail("TotalSize", FastAllocator<size>::getApproximateMemoryUnused())                                        \
	    .detail("SampleCount", 1)                                                                                      \
	    .detail("Hash", "FastAllocatedUnused" #size)                                                                   \
	    .detail("Bt", "na")
#define DETAILALLOCATORMEMUSAGE(size)                                                                                  \
	detail("TotalMemory" #size, FastAllocator<size>::getTotalMemory())                                                 \
	    .detail("ApproximateUnusedMemory" #size, FastAllocator<size>::getApproximateMemoryUnused())                    \
	    .detail("ActiveThreads" #size, FastAllocator<size>::getActiveThreads())

SystemStatistics customSystemMonitor(std::string const& eventName, StatisticsState* statState, bool machineMetrics) {
	const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
	SystemStatistics currentStats = getSystemStatistics(
	    machineState.folder.present() ? machineState.folder.get() : "", &ipAddr, &statState->systemState, true);
	NetworkData netData;
	netData.init();
	if (!g_network->isSimulated() && currentStats.initialized) {
		{
			TraceEvent(eventName.c_str())
			    .detail("Elapsed", currentStats.elapsed)
			    .detail("CPUSeconds", currentStats.processCPUSeconds)
			    .detail("MainThreadCPUSeconds", currentStats.mainThreadCPUSeconds)
			    .detail("UptimeSeconds", now() - machineState.monitorStartTime)
			    .detail("Memory", currentStats.processMemory)
			    .detail("ResidentMemory", currentStats.processResidentMemory)
			    .detail("UnusedAllocatedMemory", getTotalUnusedAllocatedMemory())
			    .detail("MbpsSent",
			            ((netData.bytesSent - statState->networkState.bytesSent) * 8e-6) / currentStats.elapsed)
			    .detail("MbpsReceived",
			            ((netData.bytesReceived - statState->networkState.bytesReceived) * 8e-6) / currentStats.elapsed)
			    .detail("DiskTotalBytes", currentStats.processDiskTotalBytes)
			    .detail("DiskFreeBytes", currentStats.processDiskFreeBytes)
			    .detail("DiskQueueDepth", currentStats.processDiskQueueDepth)
			    .detail("DiskIdleSeconds", currentStats.processDiskIdleSeconds)
			    .detail("DiskReads", currentStats.processDiskRead)
			    .detail("DiskReadSeconds", currentStats.processDiskReadSeconds)
			    .detail("DiskWrites", currentStats.processDiskWrite)
			    .detail("DiskWriteSeconds", currentStats.processDiskWriteSeconds)
			    .detail("DiskReadsCount", currentStats.processDiskReadCount)
			    .detail("DiskWritesCount", currentStats.processDiskWriteCount)
			    .detail("DiskWriteSectors", currentStats.processDiskWriteSectors)
			    .detail("DiskReadSectors", currentStats.processDiskReadSectors)
			    .detail("FileWrites", netData.countFileLogicalWrites - statState->networkState.countFileLogicalWrites)
			    .detail("FileReads", netData.countFileLogicalReads - statState->networkState.countFileLogicalReads)
			    .detail("CacheReadBytes",
			            netData.countFileCacheReadBytes - statState->networkState.countFileCacheReadBytes)
			    .detail("CacheFinds", netData.countFileCacheFinds - statState->networkState.countFileCacheFinds)
			    .detail("CacheWritesBlocked",
			            netData.countFileCacheWritesBlocked - statState->networkState.countFileCacheWritesBlocked)
			    .detail("CacheReadsBlocked",
			            netData.countFileCacheReadsBlocked - statState->networkState.countFileCacheReadsBlocked)
			    .detail("CachePageReadsMerged",
			            netData.countFileCachePageReadsMerged - statState->networkState.countFileCachePageReadsMerged)
			    .detail("CacheWrites", netData.countFileCacheWrites - statState->networkState.countFileCacheWrites)
			    .detail("CacheReads", netData.countFileCacheReads - statState->networkState.countFileCacheReads)
			    .detail("CacheHits", netData.countFilePageCacheHits - statState->networkState.countFilePageCacheHits)
			    .detail("CacheMisses",
			            netData.countFilePageCacheMisses - statState->networkState.countFilePageCacheMisses)
			    .detail("CacheEvictions",
			            netData.countFilePageCacheEvictions - statState->networkState.countFilePageCacheEvictions)
			    .detail("DCID", machineState.dcId)
			    .detail("ZoneID", machineState.zoneId)
			    .detail("MachineID", machineState.machineId)
			    .detail("AIOSubmitCount", netData.countAIOSubmit - statState->networkState.countAIOSubmit)
			    .detail("AIOCollectCount", netData.countAIOCollect - statState->networkState.countAIOCollect)
			    .detail("AIOSubmitLag",
			            (g_network->networkInfo.metrics.secSquaredSubmit -
			             statState->networkMetricsState.secSquaredSubmit) /
			                currentStats.elapsed)
			    .detail("AIODiskStall",
			            (g_network->networkInfo.metrics.secSquaredDiskStall -
			             statState->networkMetricsState.secSquaredDiskStall) /
			                currentStats.elapsed)
			    .detail("CurrentConnections",
			            netData.countConnEstablished - netData.countConnClosedWithError -
			                netData.countConnClosedWithoutError)
			    .detail("ConnectionsEstablished",
			            (double)(netData.countConnEstablished - statState->networkState.countConnEstablished) /
			                currentStats.elapsed)
			    .detail("ConnectionsClosed",
			            ((netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) +
			             (netData.countConnClosedWithoutError - statState->networkState.countConnClosedWithoutError)) /
			                currentStats.elapsed)
			    .detail("ConnectionErrors",
			            (netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) /
			                currentStats.elapsed)
			    .detail("TLSPolicyFailures",
			            (netData.countTLSPolicyFailures - statState->networkState.countTLSPolicyFailures) /
			                currentStats.elapsed)
			    .trackLatest(eventName);

			TraceEvent("MemoryMetrics")
			    .DETAILALLOCATORMEMUSAGE(16)
			    .DETAILALLOCATORMEMUSAGE(32)
			    .DETAILALLOCATORMEMUSAGE(64)
			    .DETAILALLOCATORMEMUSAGE(96)
			    .DETAILALLOCATORMEMUSAGE(128)
			    .DETAILALLOCATORMEMUSAGE(256)
			    .DETAILALLOCATORMEMUSAGE(512)
			    .DETAILALLOCATORMEMUSAGE(1024)
			    .DETAILALLOCATORMEMUSAGE(2048)
			    .DETAILALLOCATORMEMUSAGE(4096)
			    .DETAILALLOCATORMEMUSAGE(8192)
			    .DETAILALLOCATORMEMUSAGE(16384)
			    .detail("HugeArenaMemory", g_hugeArenaMemory.load())
			    .detail("DCID", machineState.dcId)
			    .detail("ZoneID", machineState.zoneId)
			    .detail("MachineID", machineState.machineId);

			uint64_t total_memory = 0;
			total_memory += FastAllocator<16>::getTotalMemory();
			total_memory += FastAllocator<32>::getTotalMemory();
			total_memory += FastAllocator<64>::getTotalMemory();
			total_memory += FastAllocator<96>::getTotalMemory();
			total_memory += FastAllocator<128>::getTotalMemory();
			total_memory += FastAllocator<256>::getTotalMemory();
			total_memory += FastAllocator<512>::getTotalMemory();
			total_memory += FastAllocator<1024>::getTotalMemory();
			total_memory += FastAllocator<2048>::getTotalMemory();
			total_memory += FastAllocator<4096>::getTotalMemory();
			total_memory += FastAllocator<8192>::getTotalMemory();
			total_memory += FastAllocator<16384>::getTotalMemory();

			uint64_t unused_memory = 0;
			unused_memory += FastAllocator<16>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<32>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<64>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<96>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<128>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<256>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<512>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<1024>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<2048>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<4096>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<8192>::getApproximateMemoryUnused();
			unused_memory += FastAllocator<16384>::getApproximateMemoryUnused();

			if (total_memory > 0) {
				TraceEvent("FastAllocMemoryUsage")
				    .detail("TotalMemory", total_memory)
				    .detail("UnusedMemory", unused_memory)
				    .detail("Utilization", format("%f%%", (total_memory - unused_memory) * 100.0 / total_memory));
			}

			TraceEvent n("NetworkMetrics");
			n.detail("Elapsed", currentStats.elapsed)
			    .detail("CantSleep", netData.countCantSleep - statState->networkState.countCantSleep)
			    .detail("WontSleep", netData.countWontSleep - statState->networkState.countWontSleep)
			    .detail("Yields", netData.countYields - statState->networkState.countYields)
			    .detail("YieldCalls", netData.countYieldCalls - statState->networkState.countYieldCalls)
			    .detail("YieldCallsTrue", netData.countYieldCallsTrue - statState->networkState.countYieldCallsTrue)
			    .detail("RunLoopProfilingSignals",
			            netData.countRunLoopProfilingSignals - statState->networkState.countRunLoopProfilingSignals)
			    .detail("YieldBigStack", netData.countYieldBigStack - statState->networkState.countYieldBigStack)
			    .detail("RunLoopIterations", netData.countRunLoop - statState->networkState.countRunLoop)
			    .detail("TimersExecuted", netData.countTimers - statState->networkState.countTimers)
			    .detail("TasksExecuted", netData.countTasks - statState->networkState.countTasks)
			    .detail("ASIOEventsProcessed", netData.countASIOEvents - statState->networkState.countASIOEvents)
			    .detail("ReadCalls", netData.countReads - statState->networkState.countReads)
			    .detail("WriteCalls", netData.countWrites - statState->networkState.countWrites)
			    .detail("ReadProbes", netData.countReadProbes - statState->networkState.countReadProbes)
			    .detail("WriteProbes", netData.countWriteProbes - statState->networkState.countWriteProbes)
			    .detail("PacketsRead", netData.countPacketsReceived - statState->networkState.countPacketsReceived)
			    .detail("PacketsGenerated",
			            netData.countPacketsGenerated - statState->networkState.countPacketsGenerated)
			    .detail("WouldBlock", netData.countWouldBlock - statState->networkState.countWouldBlock)
			    .detail("LaunchTime", netData.countLaunchTime - statState->networkState.countLaunchTime)
			    .detail("ReactTime", netData.countReactTime - statState->networkState.countReactTime)
			    .detail("DCID", machineState.dcId)
			    .detail("ZoneID", machineState.zoneId)
			    .detail("MachineID", machineState.machineId);

			for (int i = 0; i < NetworkMetrics::SLOW_EVENT_BINS; i++) {
				if (int c = g_network->networkInfo.metrics.countSlowEvents[i] -
				            statState->networkMetricsState.countSlowEvents[i]) {
					n.detail(format("SlowTask%dM", 1 << i).c_str(), c);
				}
			}

			std::map<TaskPriority, double> loggedDurations;
			for (auto& itr : g_network->networkInfo.metrics.activeTrackers) {
				if (itr.second.active) {
					itr.second.duration += now() - itr.second.windowedTimer;
					itr.second.windowedTimer = now();
				}

				if (itr.second.duration / currentStats.elapsed >= FLOW_KNOBS->MIN_LOGGED_PRIORITY_BUSY_FRACTION) {
					loggedDurations[itr.first] = std::min(currentStats.elapsed, itr.second.duration);
				}

				itr.second.duration = 0;
			}

			for (auto const& itr : loggedDurations) {
				// PriorityBusyX measures the amount of time spent busy at exactly priority X
				n.detail(format("PriorityBusy%d", itr.first).c_str(), itr.second);
			}

			bool firstTracker = true;
			for (auto& itr : g_network->networkInfo.metrics.starvationTrackers) {
				if (itr.active) {
					itr.duration += now() - itr.windowedTimer;
					itr.maxDuration = std::max(itr.maxDuration, now() - itr.timer);
					itr.windowedTimer = now();
				}

				// PriorityStarvedBelowX: how much of the elapsed time we were running tasks at a priority at or above X
				// PriorityMaxStarvedBelowX: The longest single span of time that you were starved below that priority,
				// which could tell you if you are doing work in bursts.
				n.detail(format("PriorityStarvedBelow%d", itr.priority).c_str(),
				         std::min(currentStats.elapsed, itr.duration));
				n.detail(format("PriorityMaxStarvedBelow%d", itr.priority).c_str(), itr.maxDuration);

				if (firstTracker) {
					g_network->networkInfo.metrics.lastRunLoopBusyness =
					    std::min(currentStats.elapsed, itr.duration) / currentStats.elapsed;
					firstTracker = false;
				}

				itr.duration = 0;
				itr.maxDuration = 0;
			}

			n.trackLatest("NetworkMetrics");
		}

		if (machineMetrics) {
			TraceEvent("MachineMetrics")
			    .detail("Elapsed", currentStats.elapsed)
			    .detail("MbpsSent", currentStats.machineMegabitsSent / currentStats.elapsed)
			    .detail("MbpsReceived", currentStats.machineMegabitsReceived / currentStats.elapsed)
			    .detail("OutSegs", currentStats.machineOutSegs)
			    .detail("RetransSegs", currentStats.machineRetransSegs)
			    .detail("CPUSeconds", currentStats.machineCPUSeconds)
			    .detail("TotalMemory", currentStats.machineTotalRAM)
			    .detail("CommittedMemory", currentStats.machineCommittedRAM)
			    .detail("AvailableMemory", currentStats.machineAvailableRAM)
			    .detail("DCID", machineState.dcId)
			    .detail("ZoneID", machineState.zoneId)
			    .detail("MachineID", machineState.machineId)
			    .trackLatest("MachineMetrics");
		}
	}

#ifdef ALLOC_INSTRUMENTATION
	{
		static double firstTime = 0.0;
		if (firstTime == 0.0)
			firstTime = now();
		if (now() - firstTime > 10 || g_network->isSimulated()) {
			firstTime = now();
			std::vector<std::pair<std::string, const char*>> typeNames;
			for (auto i = allocInstr.begin(); i != allocInstr.end(); ++i) {
				std::string s;
#ifdef __linux__
				char* demangled = abi::__cxa_demangle(i->first, nullptr, nullptr, nullptr);
				if (demangled) {
					s = demangled;
					if (StringRef(s).startsWith(LiteralStringRef("(anonymous namespace)::")))
						s = s.substr(LiteralStringRef("(anonymous namespace)::").size());
					free(demangled);
				} else
					s = i->first;
#else
				s = i->first;
				if (StringRef(s).startsWith(LiteralStringRef("class `anonymous namespace'::")))
					s = s.substr(LiteralStringRef("class `anonymous namespace'::").size());
				else if (StringRef(s).startsWith(LiteralStringRef("class ")))
					s = s.substr(LiteralStringRef("class ").size());
				else if (StringRef(s).startsWith(LiteralStringRef("struct ")))
					s = s.substr(LiteralStringRef("struct ").size());
#endif
				typeNames.emplace_back(s, i->first);
			}
			std::sort(typeNames.begin(), typeNames.end());
			for (int i = 0; i < typeNames.size(); i++) {
				const char* n = typeNames[i].second;
				auto& f = allocInstr[n];
				if (f.maxAllocated > 10000)
					TraceEvent("AllocInstrument")
					    .detail("CurrentAlloc", f.allocCount - f.deallocCount)
					    .detail("Name", typeNames[i].first.c_str());
			}

			std::unordered_map<uint32_t, BackTraceAccount> traceCounts;
			size_t memSampleSize;
			memSample_entered = true;
			{
				ThreadSpinLockHolder holder(memLock);
				traceCounts = backTraceLookup;
				memSampleSize = memSample.size();
			}
			memSample_entered = false;

			uint64_t totalSize = 0;
			uint64_t totalCount = 0;
			for (auto i = traceCounts.begin(); i != traceCounts.end(); ++i) {
				char buf[1024];
				std::vector<void*>* frames = i->second.backTrace;
				std::string backTraceStr;
#if defined(_WIN32)
				for (int j = 1; j < frames->size(); j++) {
					_snprintf(buf, 1024, "%p ", frames->at(j));
					backTraceStr += buf;
				}
#else
				backTraceStr = platform::format_backtrace(&(*frames)[0], frames->size());
#endif

				TraceEvent("MemSample")
				    .detail("Count", (int64_t)i->second.count)
				    .detail("TotalSize", i->second.totalSize)
				    .detail("SampleCount", i->second.sampleCount)
				    .detail("Hash", format("%lld", i->first))
				    .detail("Bt", backTraceStr);

				totalSize += i->second.totalSize;
				totalCount += i->second.count;
			}

			TraceEvent("MemSampleSummary")
			    .detail("InverseByteSampleRatio", SAMPLE_BYTES)
			    .detail("MemorySamples", memSampleSize)
			    .detail("BackTraces", traceCounts.size())
			    .detail("TotalSize", totalSize)
			    .detail("TotalCount", totalCount);

			TraceEvent("MemSample")
			    .detail("Count", traceCounts.size())
			    .detail("TotalSize", traceCounts.size() * ((int)(sizeof(uint32_t) + sizeof(size_t) + sizeof(size_t))))
			    .detail("SampleCount", traceCounts.size())
			    .detail("Hash", "backTraces")
			    .detail("Bt", "na");

			TraceEvent("MemSample")
			    .detail("Count", memSampleSize)
			    .detail("TotalSize", memSampleSize * ((int)(sizeof(void*) + sizeof(uint32_t) + sizeof(size_t))))
			    .detail("SampleCount", memSampleSize)
			    .detail("Hash", "memSamples")
			    .detail("Bt", "na");
			TRACEALLOCATOR(16);
			TRACEALLOCATOR(32);
			TRACEALLOCATOR(64);
			TRACEALLOCATOR(96);
			TRACEALLOCATOR(128);
			TRACEALLOCATOR(256);
			TRACEALLOCATOR(512);
			TRACEALLOCATOR(1024);
			TRACEALLOCATOR(2048);
			TRACEALLOCATOR(4096);
			TRACEALLOCATOR(8192);
		}
	}
#endif
	statState->networkMetricsState = g_network->networkInfo.metrics;
	statState->networkState = netData;
	return currentStats;
}

Future<Void> startMemoryUsageMonitor(uint64_t memLimit) {
	if (memLimit == 0) {
		return Void();
	}
	auto checkMemoryUsage = [=]() {
		if (getResidentMemoryUsage() > memLimit) {
			platform::outOfMemory();
		}
	};
	return recurring(checkMemoryUsage, FLOW_KNOBS->MEMORY_USAGE_CHECK_INTERVAL);
}