5 students were not classified due to lack of information in the pdf file.

OR Map

Subject Distribution

Source Code

knitr::opts_chunk$set(echo = TRUE)
library(shiny)
library(leaflet)
library(htmlwidgets)
library(tidyverse)
library(htmltools)
library(rsconnect)
library(leaflet.extras)
library(stringr)
library(ggthemes)
library(plotly)

###################################################################################
####  The Spatial Dataframe contains the coordinates for the country polygons #####
###################################################################################

hlink <- 'http://www.econworks.co/uploads/9/6/1/6/96165748/02_countries_spdf.rds' 
spatial_data <- read_rds(gzcon(url(hlink)))
########################################################################################
#### Trim down the initial Spatial Dataframe  and load the OR Students pdf file ########
########################################################################################

## Load the PDF file with the OR students and parse the relevant data
pdf <- pdftools::pdf_text(choose.files())
country_names <- spatial_data@data[["name"]]

## Count the number of occurences of a given country string
country_index <- str_count(pdf, pattern = country_names)

## Insert the values within the Spatial Dataframe 
spatial_data@data[["number"]] <- country_index

## Extract only the countries with more than 0 students
student_data <- spatial_data[spatial_data$number > 0, ]
  
## Find the total number of students in the program
num_students <- sum(country_index)

## Create a bin for each unique number of students ;
### The labels need to be located after the '% share' computation has been performed
country_vector <- country_index[country_index > 0]
bins = sort(unique(country_vector))

## Create a color palette for the country polygons  
pal <- colorFactor("YlOrRd", domain = student_data$number, reverse = FALSE)
  
## Labels for the country shares  
country_shares <- paste0(round(100*(student_data$number/num_students), digits = 0), "%")
  
## Use C formatting for the data displayed on mouse hover
labels <- sprintf(
    "<strong>%s</strong><br/># Students: %g <br/> Share: %s",
    student_data$name, student_data$number, country_shares) %>% lapply(HTML)
  
##############################################################
##### Get the share of students from each category  ##########
#############################################################

## Initialize a list to store the values
freqs <- list()
groups_frequency <- for(i in bins){ 
                        freqs[i] <- table(country_vector == i)[2][[1]]}
  
## The freqs variable contains the frequency of each group. Now we need to unlist it
freqs <- unlist(freqs)
group_shares <- round(100*(bins * freqs)/num_students, digits = 0)


### Create labels for the group share via a loop
groups_num <- length(group_shares) ## there are 5 groups

## Initialize an empty vector of length equal to the number of elements in vector groups_num (5)
groups <- vector(length = groups_num)
  
   for (i  in seq_along(1:groups_num)) {
      
   groups[i] <- c(paste0(" : (Group Share: ", group_shares[i], "%)"))
    
  }  
##############################################################
##########    Setting up the Leaflet Map      ################
#############################################################

### Create a custom framework for the leaflet map
  newPolicy <- sizingPolicy(defaultWidth = '100%', defaultHeight = 600, 
                            padding = 5, knitr.figure = TRUE)
  
  
  student_data %>%
    leaflet(sizingPolicy = newPolicy)  %>% 
    addProviderTiles("CartoDB.DarkMatter", 
                     options = providerTileOptions(minZoom = 1, maxZoom = 10 
                     ))  %>% 
    
    ## Position the market at the geolocation of Edinburgh, UK
    addMarkers(lng = -3.1883, lat = 55.9533, label = ~"Edinburgh",
               labelOptions = labelOptions(textsize = "14px") ) %>%
    setView(lng = -3.1883, lat = 40, zoom = 2) %>%
    addPolygons(
      fillColor = ~pal(number),
      weight = 2,
      opacity = 1,
      color = "white",
      dashArray = "3",
      fillOpacity = 0.7,
      highlight = highlightOptions(
        weight = 5,
        color = "#FF9900",
        dashArray = "",
        fillOpacity = 0.7,
        bringToFront = TRUE),
      label = labels,
      labelOptions = labelOptions(
        style = list("font-weight" = "normal", padding = "2px 6px"),
        textsize = "15px",
        direction = "auto")) %>%
    addLegend(pal = pal, values = ~number, opacity = 0.99, 
              title = paste0("# OR Students: ", num_students), 
              labFormat = labelFormat(suffix = groups),
              position = "bottomright") 
  
  




##############################################################
##########    Subject Specific Distribution  ################
#############################################################

### Distributions by subject group : OR, ORwDS, ORwR, ORwCO
### Use OR as a reference group  
groups <- c("ORwDS", "ORwR", "ORwCO")
subject_groups <- str_count(pdf, pattern = groups)
OR <- num_students - sum(subject_groups)
subject_groups <- data.frame(Group = c("OR", groups), 
                             Number = c(OR, subject_groups))

subject_groups <- subject_groups %>%
                  mutate(Share =round(100*(Number/sum(Number)), digits = 1))



graph <- plot_ly(subject_groups, labels = ~Group, values = ~Number, type = "pie",
            textposition = 'inside',
            textinfo = 'label + percent',
            insidetextfont = list(color = '#FFFFFF'),
            hoverinfo = 'label',
            text = ~paste('# Students: ', Number),
            marker = list(colors = colors,
                          line = list(color = '#FFFFFF', width = 2)),
#The 'pull' attribute can also be used to create space between the sectors
      showlegend = TRUE) %>%
        layout(title = 'Operational Research Students Count')

ggplotly(graph)