There’s been conversation regarding US university endowments and how the richest universities are doing less to help low-income students. I decided to scrape university endowment data from Wikipedia and make a plot of the Top 25 endowments.
suppressMessages(library("rvest"))
suppressMessages(library("dplyr"))
suppressMessages(library("ggplot2"))
url <- "https://en.wikipedia.org/wiki/List_of_colleges_and_universities_in_the_United_States_by_endowment"
endowment <- url %>%
read_html() %>%
html_nodes(xpath='//*[@id="mw-content-text"]/table[1]') %>%
html_table()
View(endowment)
endowment <- as.data.frame(endowment)
endowmentnew <- endowment[, 1:2]
endowmentnew$X2015..billion.USD. <- gsub("\\[.*", "", endowmentnew$X2015..billion.USD.)
endowmentnew$X2015..billion.USD. <- gsub("[$]", "", endowmentnew$X2015..billion.USD.)
endowmentnew$Institution <- as.factor(endowmentnew$Institution)
endowmentnew$X2015..billion.USD. <- as.numeric(endowmentnew$X2015..billion.USD.)
colnames(endowmentnew)[2] <- "Endowment in 2015 (US$B)"
endowmentnew <- endowmentnew[order(endowmentnew$`Endowment in 2015 (US$B)`, decreasing = TRUE),]
endowmentnew <- endowmentnew[1:25,]
endowmentnew$Institution <- gsub("\\[.*", "", endowmentnew$Institution)
endowmentnew$Institution <- gsub("\\(.*", "", endowmentnew$Institution)
View(endowmentnew)
endplot <- ggplot(endowmentnew, aes(x = reorder(endowmentnew$Institution, -endowmentnew$`Endowment in 2015 (US$B)`), y = endowmentnew$`Endowment in 2015 (US$B)`)) + geom_bar(stat = "identity", fill="#FF9999", colour="black")
endplot + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("US University Endowments") +
labs(x="University Name",y="Endowment in 2015 (US$B)")
Here’s a plot for endowment per student. There’s a stark difference in the universities that appear on this chart from the aggregate chart above.
suppressMessages(library("rvest"))
suppressMessages(library("dplyr"))
suppressMessages(library("ggplot2"))
url <- "https://en.wikipedia.org/wiki/List_of_colleges_and_universities_in_the_United_States_by_endowment"
endowmentperstudent <- url %>%
read_html() %>%
html_nodes(xpath='//*[@id="mw-content-text"]/table[2]') %>%
html_table()
View(endowmentperstudent)
endowmentperstudent <- as.data.frame(endowmentperstudent)
endowmentperstudent$X2015.Endowment.per.Student..million.USD. <- gsub("[$]", "", endowmentperstudent$X2015.Endowment.per.Student..million.USD.)
endowmentperstudent$Institution <- as.factor(endowmentperstudent$Institution)
colnames(endowmentperstudent)[4] <- "Endowment per student in 2015 (US$M)"
endowmentperstudent$X2015.Endowment..billion.USD. <- NULL
endowmentperstudent$X2015.Student.Body.Size <- NULL
endowmentperstudent$`Endowment per student in 2015 (US$M)` <- as.numeric(endowmentperstudent$`Endowment per student in 2015 (US$M)`)
## Warning: NAs introduced by coercion
endowmentperstudent <- endowmentperstudent[order(endowmentperstudent$`Endowment per student in 2015 (US$M)`, decreasing = TRUE),]
endowmentperstudent <- endowmentperstudent[1:25,]
endplotperstudent <- ggplot(endowmentperstudent, aes(x = reorder(endowmentperstudent$Institution, -endowmentperstudent$`Endowment per student in 2015 (US$M)`), y = endowmentperstudent$`Endowment per student in 2015 (US$M)`)) + geom_bar(stat = "identity", fill="#040c42", colour="black")
endplotperstudent + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("US University Endowments per Student") +
labs(x="University Name",y="Endowment per student in 2015 (US$M)")