5 students were not classified due to lack of information in the pdf file.
knitr::opts_chunk$set(echo = TRUE)
library(shiny)
library(leaflet)
library(htmlwidgets)
library(tidyverse)
library(htmltools)
library(rsconnect)
library(leaflet.extras)
library(stringr)
library(ggthemes)
library(plotly)
###################################################################################
#### The Spatial Dataframe contains the coordinates for the country polygons #####
###################################################################################
hlink <- 'http://www.econworks.co/uploads/9/6/1/6/96165748/02_countries_spdf.rds'
spatial_data <- read_rds(gzcon(url(hlink)))
########################################################################################
#### Trim down the initial Spatial Dataframe and load the OR Students pdf file ########
########################################################################################
## Load the PDF file with the OR students and parse the relevant data
pdf <- pdftools::pdf_text(choose.files())
country_names <- spatial_data@data[["name"]]
## Count the number of occurences of a given country string
country_index <- str_count(pdf, pattern = country_names)
## Insert the values within the Spatial Dataframe
spatial_data@data[["number"]] <- country_index
## Extract only the countries with more than 0 students
student_data <- spatial_data[spatial_data$number > 0, ]
## Find the total number of students in the program
num_students <- sum(country_index)
## Create a bin for each unique number of students ;
### The labels need to be located after the '% share' computation has been performed
country_vector <- country_index[country_index > 0]
bins = sort(unique(country_vector))
## Create a color palette for the country polygons
pal <- colorFactor("YlOrRd", domain = student_data$number, reverse = FALSE)
## Labels for the country shares
country_shares <- paste0(round(100*(student_data$number/num_students), digits = 0), "%")
## Use C formatting for the data displayed on mouse hover
labels <- sprintf(
"<strong>%s</strong><br/># Students: %g <br/> Share: %s",
student_data$name, student_data$number, country_shares) %>% lapply(HTML)
##############################################################
##### Get the share of students from each category ##########
#############################################################
## Initialize a list to store the values
freqs <- list()
groups_frequency <- for(i in bins){
freqs[i] <- table(country_vector == i)[2][[1]]}
## The freqs variable contains the frequency of each group. Now we need to unlist it
freqs <- unlist(freqs)
group_shares <- round(100*(bins * freqs)/num_students, digits = 0)
### Create labels for the group share via a loop
groups_num <- length(group_shares) ## there are 5 groups
## Initialize an empty vector of length equal to the number of elements in vector groups_num (5)
groups <- vector(length = groups_num)
for (i in seq_along(1:groups_num)) {
groups[i] <- c(paste0(" : (Group Share: ", group_shares[i], "%)"))
}
##############################################################
########## Setting up the Leaflet Map ################
#############################################################
### Create a custom framework for the leaflet map
newPolicy <- sizingPolicy(defaultWidth = '100%', defaultHeight = 600,
padding = 5, knitr.figure = TRUE)
student_data %>%
leaflet(sizingPolicy = newPolicy) %>%
addProviderTiles("CartoDB.DarkMatter",
options = providerTileOptions(minZoom = 1, maxZoom = 10
)) %>%
## Position the market at the geolocation of Edinburgh, UK
addMarkers(lng = -3.1883, lat = 55.9533, label = ~"Edinburgh",
labelOptions = labelOptions(textsize = "14px") ) %>%
setView(lng = -3.1883, lat = 40, zoom = 2) %>%
addPolygons(
fillColor = ~pal(number),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlight = highlightOptions(
weight = 5,
color = "#FF9900",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "2px 6px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal, values = ~number, opacity = 0.99,
title = paste0("# OR Students: ", num_students),
labFormat = labelFormat(suffix = groups),
position = "bottomright")
##############################################################
########## Subject Specific Distribution ################
#############################################################
### Distributions by subject group : OR, ORwDS, ORwR, ORwCO
### Use OR as a reference group
groups <- c("ORwDS", "ORwR", "ORwCO")
subject_groups <- str_count(pdf, pattern = groups)
OR <- num_students - sum(subject_groups)
subject_groups <- data.frame(Group = c("OR", groups),
Number = c(OR, subject_groups))
subject_groups <- subject_groups %>%
mutate(Share =round(100*(Number/sum(Number)), digits = 1))
graph <- plot_ly(subject_groups, labels = ~Group, values = ~Number, type = "pie",
textposition = 'inside',
textinfo = 'label + percent',
insidetextfont = list(color = '#FFFFFF'),
hoverinfo = 'label',
text = ~paste('# Students: ', Number),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 2)),
#The 'pull' attribute can also be used to create space between the sectors
showlegend = TRUE) %>%
layout(title = 'Operational Research Students Count')
ggplotly(graph)