library("ggplot2")
library("dplyr")
library("tidyr")
cleanup_pidstat_h_file <- function(filename) {
lines <- readLines(filename)
# Extract the first instance of the column headers and remove the # prefix.
header_line <- substring(lines[3], 8)
# Remove blank lines.
lines <- lines[! lines %in% ""]
# Remove all instances of the repeated header line (regex could be better).
lines <- lines[-grep("^#", lines)]
# Put the column headers back in place of the first mpstat output line.
lines[1] <- header_line
lines
}
filename <- "statsgod_pidstat_1422995277.txt"
lines <- cleanup_pidstat_h_file(filename)
df <- read.table(text=lines, header=TRUE)
df$Time <- as.POSIXct(df$Time, origin="1970-01-01")
df_long <- df %>%
gather(metric, value, -Time, -PID, -Command)
df_long %>%
filter(metric %in% c("X.CPU", "X.usr", "X.system")) %>%
ggplot(aes(Time, value, colour=metric, alpha=0.4)) +
geom_line() +
ylab("% CPU") +
ggtitle("CPU usage") +
facet_wrap(~metric, ncol=1)

df %>%
ggplot(aes(Time, RSS/1024)) +
geom_line() +
ylab("RSS in mb") +
ggtitle("Memory usage")
