FullStats.csv:
library(dplyr)
start <- as.Date('2012-10-01')
today <- as.Date('2017-01-29')
all_days <- seq(start, today, by = 'day')
year <- as.POSIXlt(all_days)$year + 1900
urls <- paste0('http://cran-logs.rstudio.com/', year, '/', all_days, '.csv.gz')
for (i in 1:length(urls)) {
download.file(urls[i], sprintf("~/Desktop/R-projects/Rstats/temp%i.csv.gz", i))
}
packages.df <- data.frame()
pb <- txtProgressBar(min=0, max=length(urls), style=3)
for (i in 1:length(urls)) {
df.csv <- read.csv(sprintf("~/Desktop/R-projects/Rstats/temp%i.csv", i))
df.csv$date = as.character(df.csv$date)
df.csv$date = strftime(df.csv$date, format="%Y-%m")
df.csv$country = as.character(df.csv$country)
df.csv$package = as.character(df.csv$package)
dummy.df <- df.csv %>% dplyr::slice(my.package) %>% dplyr::select(date, package, country)
packages.df <- dplyr::bind_rows(packages.df, dummy.df)
setTxtProgressBar(pb, i)
}
close(pb)
write.csv(packages.df, file = "~/Desktop/R-projects/Rstats/FullStats.csv")
CountryCodes2Names: First download from here and save it as CountryCodes2Names then change some of the names as follows:
library(maps)
library(maptools)
codes.df = read.csv("~/Desktop/R-projects/Rstats/CountryCodes2Names")
packDC <- as.data.frame(table(packages.df$package, packages.df$country))
names(packDC) <- c("package", "cc", "count")
pack.country = inner_join(codes.df, packDC, by = "cc")
head(pack.country)
w2hr <- map_data("world")
names(w2hr)[5] = "country"
selected.country = pack.country[pack.country$package == PACK.OPT,]
selected.country$full.country =as.character(selected.country$full.country)
selected.country[selected.country$country == "US",]$full.country = "USA"
selected.country[selected.country$country == "GB",]$full.country = "UK"
selected.country[selected.country$country == "RU",]$full.country = "Russia"
selected.country[selected.country$country == "LY",]$full.country = "Libya"
selected.country[selected.country$country == "CD",]$full.country = "Democratic Republic of the Congo"
selected.country[selected.country$country == "CG",]$full.country = "Republic of Congo"
selected.country[selected.country$country == "IR",]$full.country = "Iran"
selected.country[selected.country$country == "LA",]$full.country = "Laos"
selected.country[selected.country$country == "KR",]$full.country = "South Korea"
selected.country[selected.country$country == "KP",]$full.country = "North Korea"
selected.country[selected.country$country == "CI",]$full.country = "Ivory Coast"
selected.country[selected.country$country == "SY",]$full.country = "Syria"
selected.country[selected.country$country == "TZ",]$full.country = "Tanzania"
newcodes = data.frame("cc" = selected.country$country, "country" = selected.country$full.country)
write.csv(newcodes, "~/Desktop/R-projects/Rstats/CountryCodes2Names", row.names = FALSE)
packD.csv (requires FullStats):
packages.df = read.csv("~/Desktop/R-projects/Rstats/FullStats.csv")
packages.df$X = NULL
packages.df$date = paste0(packages.df$date, "-01")
packages.df$date = strftime(as.Date(packages.df$date, format = "%Y-%m-%d"), format="%Y-%m-%d")
packD = as.data.frame(table(packages.df$package, packages.df$date))
names(packD) = c("package", "date", "count")
packD = packD[order(packD$package),]
packD$mv = 0
for(package in unique(packD$package)){
tempory = packD[packD$package == package,]
tempory.zoo = zoo(tempory$count, tempory$date)
m.av<-rollmean(tempory.zoo, 3,fill = list(NA, NULL, NA))
packD[packD$package == package,]$mv = coredata(m.av)
}
packD$mv5 = 0
for(package in unique(packD$package)){
tempory = packD[packD$package == package,]
tempory.zoo = zoo(tempory$count, tempory$date)
m.av<-rollmean(tempory.zoo, 5,fill = list(NA, NULL, NA))
packD[packD$package == package,]$mv5 = coredata(m.av)
}
write.csv(packD, "~/Desktop/R-projects/Rstats/packD.csv")
#```{r, echo=FALSE, message=FALSE, warning=FALSE}
library(ggplot2)
library(plotly)
library(dplyr)
library(maps)
library(mapdata)
#####
dateInput <- function(inputId, label, value = NULL, min = NULL, max = NULL,
format = "yyyy-mm-dd", startview = "month", minviewmode = "days",
weekstart = 0, language = "en", width = NULL) {
# If value is a date object, convert it to a string with yyyy-mm-dd format
# Same for min and max
if (inherits(value, "Date")) value <- format(value, "%Y-%m-%d")
if (inherits(min, "Date")) min <- format(min, "%Y-%m-%d")
if (inherits(max, "Date")) max <- format(max, "%Y-%m-%d")
value <- restoreInput(id = inputId, default = value)
tags$div(id = inputId,
class = "shiny-date-input form-group shiny-input-container",
style = if (!is.null(width)) paste0("width: ", validateCssUnit(width), ";"),
controlLabel(inputId, label),
tags$input(type = "text",
class = "form-control",
`data-date-language` = language,
`data-date-week-start` = weekstart,
`data-date-min-view-mode` = minviewmode,
`data-date-format` = format,
`data-date-start-view` = startview,
`data-min-date` = min,
`data-max-date` = max,
`data-initial-date` = value
),
datePickerDependency
)
}
`%AND%` <- function(x, y) {
if (!is.null(x) && !is.na(x))
if (!is.null(y) && !is.na(y))
return(y)
return(NULL)
}
controlLabel <- function(controlName, label) {
label %AND% tags$label(class = "control-label", `for` = controlName, label)
}
datePickerDependency <- htmlDependency(
"bootstrap-datepicker", "1.6.4", c(href = "shared/datepicker"),
script = "js/bootstrap-datepicker.min.js",
stylesheet = "css/bootstrap-datepicker3.min.css",
# Need to enable noConflict mode. See #1346.
head = "<script>
(function() {
var datepicker = $.fn.datepicker.noConflict();
$.fn.bsDatepicker = datepicker;
})();
</script>"
)
dateRangeInput <- function(inputId, label, start = NULL, end = NULL,
min = NULL, max = NULL, format = "yyyy-mm-dd", startview = "month",
minviewmode = "days", weekstart = 0, language = "en",
separator = " to ", width = NULL) {
# If start and end are date objects, convert to a string with yyyy-mm-dd format
# Same for min and max
if (inherits(start, "Date")) start <- format(start, "%Y-%m-%d")
if (inherits(end, "Date")) end <- format(end, "%Y-%m-%d")
if (inherits(min, "Date")) min <- format(min, "%Y-%m-%d")
if (inherits(max, "Date")) max <- format(max, "%Y-%m-%d")
restored <- restoreInput(id = inputId, default = list(start, end))
start <- restored[[1]]
end <- restored[[2]]
attachDependencies(
div(id = inputId,
class = "shiny-date-range-input form-group shiny-input-container",
style = if (!is.null(width)) paste0("width: ", validateCssUnit(width), ";"),
controlLabel(inputId, label),
# input-daterange class is needed for dropdown behavior
div(class = "input-daterange input-group",
tags$input(
class = "input-sm form-control",
type = "text",
`data-date-language` = language,
`data-date-weekstart` = weekstart,
`data-date-min-view-mode` = minviewmode,
`data-date-format` = format,
`data-date-start-view` = startview,
`data-min-date` = min,
`data-max-date` = max,
`data-initial-date` = start
),
span(class = "input-group-addon", separator),
tags$input(
class = "input-sm form-control",
type = "text",
`data-date-language` = language,
`data-date-weekstart` = weekstart,
`data-date-min-view-mode` = minviewmode,
`data-date-format` = format,
`data-date-start-view` = startview,
`data-min-date` = min,
`data-max-date` = max,
`data-initial-date` = end
)
)
),
datePickerDependency
)
}
#####
packages.df = read.csv("FullStats.csv")
packages.df$date = paste0(packages.df$date, "-01")
codes.df = read.csv("CountryCodes2Names")
popular = as.data.frame(table(packages.df$package)[order(table(packages.df$package), decreasing = TRUE)])
names(popular) = c("Packages", "Downloads")
packD <- read.csv("packD.csv")
packD$date = strftime(as.Date(packD$date, format = "%Y-%m-%d"), format="%Y-%m-%d")
w2hr <- map_data("world")
names(w2hr)[5] = "country"
#```
#```{r, echo=FALSE, message=FALSE, warning=FALSE}
fluidPage(
titlePanel("Packages downloaded from the CRAN repository"),
sidebarLayout(
sidebarPanel(
conditionalPanel(
condition = "input.tabvals == 1",
selectInput(inputId = "package",
label = h4("Packages:"),
selected = popular$Packages[1],
choices = popular$Packages,
multiple = TRUE),
dateRangeInput(inputId = "date",
label = h4("Date range:"),
start = "2012-01-01",
end = "2017-01-01",
min = "2012-01-01",
max = "2017-01-01",
format = "M-yyyy",
startview = "year",
minviewmode= "months"),
radioButtons("radio",
label = h4("Moving average options"),
choices = list("1 point moving average" = 1,
"3 point moving average" = 3,
"5 point moving average" = 5),
selected = 1)
),
conditionalPanel(
condition = "input.tabvals == 2",
selectInput(inputId = "packagemap",
label = h4("Package:"),
selected = popular$Packages[1],
choices = popular$Packages,
multiple = FALSE),
dateRangeInput(inputId = "datemap",
label = h4("Date range:"),
start = "2012-01-01",
end = "2017-01-01",
min = "2012-01-01",
max = "2017-01-01",
format = "M-yyyy",
startview = "year",
minviewmode= "months")
)
),
mainPanel(
tabsetPanel(id ='tabvals',
tabPanel("Time",
plotlyOutput("plot"),
value = 1),
tabPanel("Global",
plotlyOutput("map"),
verbatimTextOutput("extraText"),
value = 2)
)
)
)
)
### TIME
output$plot = renderPlotly({
packDS = packD[packD$date >= strftime(min(input$date), format="%Y-%m-%d") & packD$date <= strftime(max(input$date), format="%Y-%m-%d") ,]
packDS$date = as.Date(packDS$date, format = "%Y-%m-%d")
if(input$radio == 1){
ggplot(packDS[packDS$package %in% input$package,], aes(x=date, y = count)) +
geom_line(aes(group = package, color = package)) +
ggtitle("Package Trends using raw downloads") +
ylab("Number of downloads") +
xlab("Date") +
theme_minimal() +
scale_x_date(date_labels = "%b %y", date_breaks = "3 months") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1))
ggplotly()
} else if(input$radio == 3){
ggplot(packDS[packDS$package %in% input$package,], aes(x=date, y = mv)) +
geom_line(aes(group = package, color = package)) +
ggtitle("Package Trends using a 3 point moving average") +
ylab("Number of downloads") +
xlab("Date") +
scale_x_date(date_labels = "%b %y", date_breaks = "3 months") +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1))
ggplotly()
} else if(input$radio == 5){
ggplot(packDS[packDS$package %in% input$package,], aes(x=date, y = mv5)) +
geom_line(aes(group = package, color = package)) +
ggtitle("Package Trends using a 5 point moving average") +
ylab("Number of downloads") +
xlab("Date") +
scale_x_date(date_labels = "%b %y", date_breaks = "3 months") +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1))
ggplotly()
}
})
### MAP
output$map = renderPlotly({
packages.df$date = strftime(as.Date(packages.df$date, format = "%Y-%m-%d"), format="%Y-%m-%d")
tmp = packages.df[packages.df$date >= strftime(min(input$datemap), format="%Y-%m-%d") & packages.df$date <= strftime(max(input$datemap), format="%Y-%m-%d") ,]
packDC <- as.data.frame(table(tmp$package, tmp$country))
names(packDC) <- c("package", "cc", "count")
pack.country = inner_join(codes.df, packDC, by = "cc")
selected.country = pack.country[pack.country$package == input$packagemap,]
pack.country.df = inner_join(w2hr, selected.country, by = "country")
empty.df = w2hr[is.na(match(w2hr$country, as.character(pack.country.df$country))),]
ggplot(data = pack.country.df, aes(x=long, y = lat, group = group)) +
geom_polygon(aes(fill = count, text = paste("Country:", country)), color = "white", size = 0.1) +
geom_polygon(data = empty.df, aes(x=long, y = lat, group = group, text = paste("Country:", country)), fill = "grey", color = "white", size = 0.1) +
ggtitle(paste("Number of downloads of package:", input$package[1], "across the world")) +
scale_fill_distiller(palette = "Spectral", name = "Number of downloads") +
theme_minimal() + theme(
axis.text = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5),
legend.position = "none"
)
ggplotly()
})
### MAP TEXT
output$extraText = renderText({
packages.df$date = strftime(as.Date(packages.df$date, format = "%Y-%M"), format="%Y-%m")
tmp = packages.df[packages.df$date >= strftime(min(input$datemap), format="%Y-%m") & packages.df$date <= strftime(max(input$datemap), format="%Y-%m") ,]
packDC <- as.data.frame(table(tmp$package, tmp$country))
names(packDC) <- c("package", "cc", "count")
pack.country = inner_join(codes.df, packDC, by = "cc")
selected.country = pack.country[pack.country$package == input$packagemap,]
paste(paste("Anonymous Proxy:",selected.country[selected.country$cc == "A1",]$count),
paste("Satellite Provider:",selected.country[selected.country$cc == "A2",]$count),
paste("EU blocked IP:",selected.country[selected.country$cc == "EU",]$count),
paste("Asia blocked IP:",selected.country[selected.country$cc == "AP",]$count),
sep = "\n")
})
#```
Name: Andy Challis
Email: andrewchallis@hotmail.co.uk
Linkedin: uk.linkedin.com/in/achallis