MATH 2270 Data Visualisation - Assignment 3

Source URL

Data Source

Visualisation URL

Code

library(readr)
library(ggplot2)
library(networkD3)
library(superheat)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.2     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v purrr   0.3.4     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(treemap)
## This version of bslib is designed to work with shiny version 1.6.0 or higher.
# Table 2.4: All Students by State of Permanent Home Residence and State of Higher Education Institution of Study, Full Year 2019
# Import and Create a index column in the dataset when improting
MultiState <-  read.csv("MultiState.csv",fileEncoding="UTF-8-BOM", header=TRUE, row.names="State")

# Table 2.11: All Students by State, Higher Education Institution and Gender, indeterminate/Intersex/Unspecified(a), Full Year 2019
Uni_by_Gender<- read.csv("Uni_by_Gender.csv",fileEncoding="UTF-8-BOM")

#Table 2.1: All Students by Age Group and Broad Level of Course, Full Year 2019
UniAgeGroup <-  read.csv("UniAgeGroup.csv",fileEncoding="UTF-8-BOM") 

#Column {data-width=350}

Age group wise course level enrollment - 2019

UniAgeGroup <-  read.csv("UniAgeGroup.csv",fileEncoding="UTF-8-BOM") #, header=TRUE, row.names="State"

# basic treemap
# basic treemap
p <- treemap(UniAgeGroup,
             index=c("group","subgroup","value"),
             vSize="value",
             type="index",
             palette = "Set3",
             bg.labels=c("white"),
             align.labels=list(
               c("center", "center"), 
               c("right", "bottom")
             )  
)       
## Warning in if (class(try(col2rgb(bg.labels), silent = TRUE)) == "try-error")
## stop("Invalid bg.labels"): the condition has length > 1 and only the first
## element will be used


Student movement between the states - 2019

#Sanken Diagram
data_long <- MultiState %>% rownames_to_column %>%
  gather(key = 'key', value = 'value', -rowname) %>%
  filter(value > 0)
colnames(data_long) <- c("source", "target", "value")
data_long$target <- paste(data_long$target, " ", sep="")

# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(name=c(as.character(data_long$source), as.character(data_long$target)) %>% unique())

# With networkD3, connection must be provided using id, reformat datalink
data_long$IDsource=match(data_long$source, nodes$name)-1 
data_long$IDtarget=match(data_long$target, nodes$name)-1

# prepare colour scale
ColourScal ='d3.scaleOrdinal() .range(["#FDE725FF","#B4DE2CFF","#6DCD59FF","#35B779FF","#1F9E89FF","#26828EFF","#31688EFF","#3E4A89FF","#482878FF","#440154FF"])'

# Make the Network
sankeyNetwork(Links = data_long, Nodes = nodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name", 
              sinksRight=FALSE, colourScale=ColourScal, nodeWidth=40, fontSize=13, nodePadding=20)
#Column {data-width=350}

Heat map of student movement between the states -2019

#Create the heatmap
superheat(MultiState,
          scale = TRUE,
          left.label.text.size=3,
          bottom.label.text.size=3,
          bottom.label.size = .05,
          )

Student enrollement by Sex in each state - 2019

#Select only Male and Female student leaving out Unspecified(sex) as the numbers are negkigible.
StatesSexCount <- Uni_by_Gender %>%                                        
  group_by(State) %>%                         
  summarise_at(vars(Male,Female),             
               list(name = sum))

#Rename column name
colnames(StatesSexCount) <- c("State","Male", "Female")

#Changing to wide format
Long_TestStateGen<-StatesSexCount %>% 
  gather(`Male`, `Female`, key = "Sex" , value = "Number")


#Bar Plot
ggplot(data = Long_TestStateGen, aes(x=State, y=Number, fill=Sex)) +
  geom_bar(stat="identity" , position=position_dodge())+
  scale_fill_viridis_d()

Reference