Load all necessary packages up front
library(scales)
library(gridExtra)
library(ggplot2)
library(lubridate)
library(readr)
library(tidyr)
library(dplyr)
Clear the environment and set the location of the dataset
rm(list = ls())
ICEWS_DATASET <- "~/Projects/koala/results/DatasetCollectorPipeline/events"
GED_DATASET <- "~/Datasets/ucdp/ged/ged30.csv"
Define a function to plot the data
make_plot <- function(title, data) {
data <- data %>%
mutate(Date = as.Date(Date)) %>%
group_by(Date) %>%
summarise(Count = n())
ggplot(data, aes(Date, Count)) +
ggtitle(title) +
geom_line(color = "steelblue") +
theme(axis.text.x = element_text(angle = 30),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
scale_x_date(date_breaks = "2 years",
date_minor_breaks = "1 year",
date_labels = "%Y") +
scale_y_continuous(name="Num Events", labels = comma)
}
Create the plots and display them in a grid
plot_trends <- function(events) {
# create variables for year, month and week from the date field
events <- events %>%
mutate(Year = year(EventDate),
Month = month(EventDate),
Week = floor((EventDate - min(EventDate)) / 7) + 1)
daily <- make_plot("Daily", mutate(events, Date = EventDate))
weekly <- make_plot("Weekly", mutate(events, Date = min(events$EventDate) + weeks(Week)))
monthly <- make_plot("Monthly", mutate(events, Date = ISOdate(Year, Month, 1)))
yearly <- make_plot("Yearly", mutate(events, Date = ISOdate(Year, 1, 1)))
grid.arrange(daily, weekly, monthly, yearly, ncol = 2)
}
Define a function to load ICEWS events
load_icews_events <- function(path, pattern = "^events\\.\\d{4}\\.csv$") {
files <- list.files(path, pattern = pattern, full.names = TRUE)
bind_rows(lapply(files, function(f) {
read_csv(f, progress = FALSE)
}))
}
ICEWS_DATASET %>%
load_icews_events() %>%
plot_trends()
GED_DATASET %>%
read_csv() %>%
rename(EventDate = date_start) %>%
plot_trends()