Code
library(readr)
library(ggplot2)
library(networkD3)
library(superheat)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v purrr 0.3.4 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(treemap)
## This version of bslib is designed to work with shiny version 1.6.0 or higher.
# Table 2.4: All Students by State of Permanent Home Residence and State of Higher Education Institution of Study, Full Year 2019
# Import and Create a index column in the dataset when improting
MultiState <- read.csv("MultiState.csv",fileEncoding="UTF-8-BOM", header=TRUE, row.names="State")
# Table 2.11: All Students by State, Higher Education Institution and Gender, indeterminate/Intersex/Unspecified(a), Full Year 2019
Uni_by_Gender<- read.csv("Uni_by_Gender.csv",fileEncoding="UTF-8-BOM")
#Table 2.1: All Students by Age Group and Broad Level of Course, Full Year 2019
UniAgeGroup <- read.csv("UniAgeGroup.csv",fileEncoding="UTF-8-BOM")
#Column {data-width=350}
Age group wise course level enrollment - 2019
UniAgeGroup <- read.csv("UniAgeGroup.csv",fileEncoding="UTF-8-BOM") #, header=TRUE, row.names="State"
# basic treemap
# basic treemap
p <- treemap(UniAgeGroup,
index=c("group","subgroup","value"),
vSize="value",
type="index",
palette = "Set3",
bg.labels=c("white"),
align.labels=list(
c("center", "center"),
c("right", "bottom")
)
)
## Warning in if (class(try(col2rgb(bg.labels), silent = TRUE)) == "try-error")
## stop("Invalid bg.labels"): the condition has length > 1 and only the first
## element will be used

Student movement between the states - 2019
#Sanken Diagram
data_long <- MultiState %>% rownames_to_column %>%
gather(key = 'key', value = 'value', -rowname) %>%
filter(value > 0)
colnames(data_long) <- c("source", "target", "value")
data_long$target <- paste(data_long$target, " ", sep="")
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(name=c(as.character(data_long$source), as.character(data_long$target)) %>% unique())
# With networkD3, connection must be provided using id, reformat datalink
data_long$IDsource=match(data_long$source, nodes$name)-1
data_long$IDtarget=match(data_long$target, nodes$name)-1
# prepare colour scale
ColourScal ='d3.scaleOrdinal() .range(["#FDE725FF","#B4DE2CFF","#6DCD59FF","#35B779FF","#1F9E89FF","#26828EFF","#31688EFF","#3E4A89FF","#482878FF","#440154FF"])'
# Make the Network
sankeyNetwork(Links = data_long, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
sinksRight=FALSE, colourScale=ColourScal, nodeWidth=40, fontSize=13, nodePadding=20)
#Column {data-width=350}
Heat map of student movement between the states -2019
#Create the heatmap
superheat(MultiState,
scale = TRUE,
left.label.text.size=3,
bottom.label.text.size=3,
bottom.label.size = .05,
)

Student enrollement by Sex in each state - 2019
#Select only Male and Female student leaving out Unspecified(sex) as the numbers are negkigible.
StatesSexCount <- Uni_by_Gender %>%
group_by(State) %>%
summarise_at(vars(Male,Female),
list(name = sum))
#Rename column name
colnames(StatesSexCount) <- c("State","Male", "Female")
#Changing to wide format
Long_TestStateGen<-StatesSexCount %>%
gather(`Male`, `Female`, key = "Sex" , value = "Number")
#Bar Plot
ggplot(data = Long_TestStateGen, aes(x=State, y=Number, fill=Sex)) +
geom_bar(stat="identity" , position=position_dodge())+
scale_fill_viridis_d()
