Jack Sommer

Assignment 6

Data Visualization

Libraries:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ---------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v readr   1.3.1
## -- Conflicts ------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(ggplot2)
library(ggrepel)
library(leaflet)

PART 1: USING GGPLOT2 LIBRARY

us_map<-map_data("state")
virginia<-filter(us_map, region=="virginia")
cities<-read.csv("uscities.csv", header=TRUE, sep=",")
colleges<-read.csv("college.csv", header=TRUE, sep=",")
cities_virginia<-filter(cities, state_id=="VA")
colleges_virginia<-filter(colleges, state=="VA")
names(cities_virginia)[names(cities_virginia) == "state_id"] <- "state"
colleges_virginia$city[colleges_virginia$city=="University of Richmond"] <- "Richmond"
filtered_cities_virginia<-filter(cities_virginia, population>100000)
str(colleges_virginia)
## 'data.frame':    39 obs. of  17 variables:
##  $ id                : int  231554 231624 234076 233374 232186 232423 233897 233277 234207 232043 ...
##  $ name              : chr  "Bluefield College" "College of William and Mary" "University of Virginia-Main Campus" "University of Richmond" ...
##  $ city              : chr  "Bluefield" "Williamsburg" "Charlottesville" "Richmond" ...
##  $ state             : chr  "VA" "VA" "VA" "VA" ...
##  $ region            : chr  "South" "South" "South" "South" ...
##  $ highest_degree    : chr  "Bachelor" "Graduate" "Graduate" "Graduate" ...
##  $ control           : chr  "Private" "Public" "Public" "Private" ...
##  $ gender            : chr  "CoEd" "CoEd" "CoEd" "CoEd" ...
##  $ admission_rate    : num  0.98 0.33 0.29 0.318 0.666 ...
##  $ sat_avg           : int  907 1373 1357 1337 1152 1153 946 968 1395 1003 ...
##  $ undergrads        : int  897 6256 15515 3223 21678 18697 1477 8843 1880 1198 ...
##  $ tuition           : int  22840 17656 13208 46680 10382 9662 8868 9360 45617 30800 ...
##  $ faculty_salary_avg: int  4949 10749 12867 11413 10466 7993 6844 7820 12264 5973 ...
##  $ loan_default_rate : chr  "0.077" "0.006" "0.017" "0.015" ...
##  $ median_debt       : num  18873 19500 19633 19935 20000 ...
##  $ lng               : num  -81.3 -76.7 -78.5 -77.5 -77.3 ...
##  $ lat               : num  37.3 37.3 38 37.6 38.8 ...
colleges_virginia$control<-as.factor(colleges_virginia$control)
ggplot(data=virginia, mapping=aes(x=long, y=lat, group=group))+
  geom_polygon(fill="snow2", color="black")+
  geom_point(data=colleges_virginia, aes(x=lng, y=lat, group=NULL,
                              size=tuition, color=control), alpha=0.65)+
  theme_void()+
  ggtitle("Colleges in Virginia by Type & Tuition Amount",
          subtitle="Source: U.S. Department of Education")+
  guides(col=guide_legend("Type of Institution"),
         size=guide_legend("Tuition Amount"))+
  geom_text_repel(data=filtered_cities_virginia, mapping=aes(x=lng, 
                                        y=lat, label=city, group=NULL))+
  theme(legend.position="right", plot.title=element_text(hjust=0.5), 
        plot.subtitle=element_text(hjust=0.5))

PART 2: USING LEAFLET LIBRARY

cities<-read.csv("uscities.csv", header=TRUE, sep=",")
colleges<-read.csv("college.csv", header=TRUE, sep=",")
cities_virginia<-filter(cities, state_id=="VA")
colleges_virginia<-filter(colleges, state=="VA")
names(cities_virginia)[names(cities_virginia) == "state_id"] <- "state"
colleges_virginia$city[colleges_virginia$city=="University of Richmond"] <- "Richmond"
joined_data<-colleges_virginia %>%
  left_join(cities_virginia, by="city")
joined_data<-joined_data %>% select(city, state.x, lng.x, lat.x, 
                        population, name, control, admission_rate, sat_avg, 
                        undergrads, tuition, faculty_salary_avg)
pal<-colorFactor(c("blue", "red"), domain=c("Private", "Public"))
joined_data %>%
  leaflet() %>%
  addProviderTiles(providers$Esri.DeLorme) %>%
  addCircleMarkers(lng=~lng.x, lat=~lat.x, radius=~tuition/2000,
                   popup=~paste("College:", name, "<br/>", "City:", city,
                   "<br/>", "Tuition:", tuition), color=~pal(control),
                   stroke=TRUE, fillOpacity=0.25, weight=2, opacity=.5) %>%
  addLegend("bottomright", pal=pal, values=joined_data$control,
            title="Type of Institution", opacity=1) %>%
  addControl("Colleges In Virginia by Type and Tuition Amount",
             position="topright") %>%
  addControl("Source: U.S. Department of Education",
             position="topright")

Analysis:

Using the ESRI.DELORME provider tile was very informative, as it used a map with the highways running through Virginia clearly shown. As you can see from the graph, almost every college or university within the state is directly adjacent to a major highway. The one notable exception is Ferrum College, but when you zoom in on the graph, Ferrum is still on a major local road in the area. For younger institutions, this tells me that they deliberately chose the location for their schools to be situated along highways as it would entice more students to attend the school. For older instititutions built before the age of highways, their location along highways point to the fact that their insitution was established and the town or city that they are situated in was built up around them and continued to grow, up to a point where it would make sense to add a highway throught the major population center.