It is not to easy to spot days that have an unusually high rate of newly added relays by looking at network size. What about a new graph that shows only the number of new unique relay fingerprints first seen on a given day? [1]
Using package dependency management
library(readr)
library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)Assume bridges & relays variables have unique fingerprints
servers <- read_csv("https://metrics.torproject.org/stats/servers.csv") %>%
filter(is.na(flag) & is.na(country) & is.na(version) & is.na(platform) & is.na(ec2bridge)) %>%
select(date, bridges, relays) %>%
mutate(bridges = as.integer(bridges)) %>%
mutate(date = as.Date(date)) %>%
mutate(new_relays = relays - lag(relays)) %>%
mutate(new_bridges = bridges - lag(bridges)) %>%
select(date, new_bridges, new_relays) %>%
gather(device, count, -date) %>%
arrange(date)network_changes() - function to plot network changes
network_changes <- function(start, end, path) {
end <- min(end, as.character(Sys.Date() - 2))
servers %>%
filter(date >= as.Date(start) & date <= as.Date(end)) %>%
ggplot(aes(date, count, color = device)) +
geom_line(size = 0.8) +
scale_x_date(labels = date_format("%b-%Y")) +
scale_colour_hue("", breaks = c("new_relays", "new_bridges")) +
ggtitle("Change in Daily Number of Relays\n") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(y = "", x = "\nThe Tor Project - https://metrics.torproject.org")
ggsave(path, device = "png", width = 8, height = 5, dpi = 72)
}network_changes("2007-10-27", "2017-5-25", "./images/newrelays1.png")network_changes("2016-6-1", "2017-5-25", "./images/newrelays2.png")network_changes("2015-1-1", "2015-5-25", "./images/newrelays3.png")network_changes("2013-1-1", "2016-12-31", "./images/newrelays4.png")network_changes("2012-5-1", "2014-6-30", "./images/newrelays5.png")