Load Libraries
library(tidyverse)
library(ggmap)
library(rvest)
library(geosphere)
Register Google Maps API
register_google(APIKEY)
Load in Data Set with college info
college <- read_csv(colleges_data_set)
head(college)
## # A tibble: 6 x 17
## id name city state region highest_degree control gender admission_rate
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 102669 Alas… Anch… AK West Graduate Private CoEd 0.421
## 2 101648 Mari… Mari… AL South Associate Public CoEd 0.614
## 3 100830 Aubu… Mont… AL South Graduate Public CoEd 0.802
## 4 101879 Univ… Flor… AL South Graduate Public CoEd 0.679
## 5 100858 Aubu… Aubu… AL South Graduate Public CoEd 0.835
## 6 100663 Univ… Birm… AL South Graduate Public CoEd 0.857
## # … with 8 more variables: sat_avg <dbl>, undergrads <dbl>, tuition <dbl>,
## # faculty_salary_avg <dbl>, loan_default_rate <chr>, median_debt <dbl>,
## # lon <dbl>, lat <dbl>
Get just the top 100 Colleges by undergrad Population
college <- college %>%
slice_max(undergrads, n=100) %>%
select(name, state,city, undergrads,lon,lat)
head(college)
## # A tibble: 6 x 6
## name state city undergrads lon lat
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 University of Central Florida FL Orlando 52280 -81.4 28.5
## 2 Liberty University VA Lynchburg 49340 -79.1 37.4
## 3 Texas A & M University-College St… TX College Stat… 46941 -96.3 30.6
## 4 Ohio State University-Main Campus OH Columbus 43733 -83.0 40.0
## 5 Pennsylvania State University-Mai… PA University P… 39958 -77.9 40.8
## 6 Arizona State University-Tempe AZ Tempe 39316 -112. 33.4
Scrape airforce base locations and save that to bases variable
url <- 'https://www.military.com/base-guide/browse-by-service/air-force'
selector <- ".bullet-section-summary:nth-child(1) a"
page <- read_html(url)
value <- page %>%
html_nodes(selector) %>%
html_text()
bases <- tibble(base_name = value)
head(bases)
## # A tibble: 6 x 1
## base_name
## <chr>
## 1 Maxwell-Gunter
## 2 Eielson
## 3 Elmendorf
## 4 Joint Base Elmendorf-Richardson
## 5 Davis-Monthan Air Force Base
## 6 Luke Air Force Base
add a column to the bases Dataset with geocdelocations for each abse.
bases <- bases %>%
mutate(location =geocode(base_name))
head(bases)
## # A tibble: 6 x 2
## base_name location$lon $lat
## <chr> <dbl> <dbl>
## 1 Maxwell-Gunter -86.2 32.4
## 2 Eielson -147. 64.6
## 3 Elmendorf -98.3 29.3
## 4 Joint Base Elmendorf-Richardson -150. 61.3
## 5 Davis-Monthan Air Force Base -111. 32.2
## 6 Luke Air Force Base -112. 33.5
Notice that location Column in bases contains a list, with lat and lon. Need to seperate those out into seperate columns
bases <- bases %>%
mutate(lat = location$lat,lon = location$lon) %>%
select(base_name, lon,lat)
head(bases)
## # A tibble: 6 x 3
## base_name lon lat
## <chr> <dbl> <dbl>
## 1 Maxwell-Gunter -86.2 32.4
## 2 Eielson -147. 64.6
## 3 Elmendorf -98.3 29.3
## 4 Joint Base Elmendorf-Richardson -150. 61.3
## 5 Davis-Monthan Air Force Base -111. 32.2
## 6 Luke Air Force Base -112. 33.5
Merge the bases Dataset with the College Dataset save that to base_school_pairs
base_school_pairs <- merge(bases, college, by=NULL)
head(base_school_pairs)
## base_name lon.x lat.x
## 1 Maxwell-Gunter -86.24027 32.41310
## 2 Eielson -147.06382 64.64310
## 3 Elmendorf -98.33279 29.25607
## 4 Joint Base Elmendorf-Richardson -149.69335 61.25450
## 5 Davis-Monthan Air Force Base -110.85926 32.16755
## 6 Luke Air Force Base -112.37550 33.54149
## name state city undergrads lon.y lat.y
## 1 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 2 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 3 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 4 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 5 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 6 University of Central Florida FL Orlando 52280 -81.37924 28.53834
Create a function that will calculate the distance between the School locationa and Air Force bases
point_distance <- Vectorize(function(start_lon,start_lat, finish_lon, finish_lat){
require(geosphere)
start <- c(start_lon,start_lat)
finish <- c(finish_lon,finish_lat)
distance <- distGeo(start,finish)/1609.34
return(distance)
})
base_school_pairs <- base_school_pairs %>%
mutate(distance = point_distance(lon.x,lat.x,lon.y,lat.y))
head(base_school_pairs)
## base_name lon.x lat.x
## 1 Maxwell-Gunter -86.24027 32.41310
## 2 Eielson -147.06382 64.64310
## 3 Elmendorf -98.33279 29.25607
## 4 Joint Base Elmendorf-Richardson -149.69335 61.25450
## 5 Davis-Monthan Air Force Base -110.85926 32.16755
## 6 Luke Air Force Base -112.37550 33.54149
## name state city undergrads lon.y lat.y
## 1 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 2 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 3 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 4 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 5 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## 6 University of Central Florida FL Orlando 52280 -81.37924 28.53834
## distance
## 1 394.0661
## 2 3741.8889
## 3 1027.7772
## 4 3799.1088
## 5 1773.2090
## 6 1863.5051
Select on the columns that we need
base_school_pairs <- base_school_pairs %>%
select(school_name = name, base_name, distance)
head(base_school_pairs)
## school_name base_name distance
## 1 University of Central Florida Maxwell-Gunter 394.0661
## 2 University of Central Florida Eielson 3741.8889
## 3 University of Central Florida Elmendorf 1027.7772
## 4 University of Central Florida Joint Base Elmendorf-Richardson 3799.1088
## 5 University of Central Florida Davis-Monthan Air Force Base 1773.2090
## 6 University of Central Florida Luke Air Force Base 1863.5051
as we can see from the above this returns the distance of each airforce base to a school. We need to now return the shortest distance. for this we can use the slice_min
base_school_pairs %>%
group_by(school_name) %>%
slice_min(distance)
## # A tibble: 101 x 3
## # Groups: school_name [100]
## school_name base_name distance
## <chr> <chr> <dbl>
## 1 Arizona State University-Tempe Luke Air Force Base 26.4
## 2 Auburn University Maxwell-Gunter 46.4
## 3 Brigham Young University-Idaho Hill Air Force Base 188.
## 4 Brigham Young University-Provo Hill Air Force Base 62.8
## 5 California Polytechnic State University-San … Vandenberg Air Force … 37.6
## 6 California State Polytechnic University-Pomo… March Air Reserve Base 30.2
## 7 California State University-Fresno Vandenberg Air Force … 145.
## 8 California State University-Fullerton Los Angeles Air Force… 26.5
## 9 California State University-Long Beach Los Angeles Air Force… 14.9
## 10 California State University-Los Angeles Los Angeles Air Force… 12.2
## # … with 91 more rows
head(base_school_pairs)
## school_name base_name distance
## 1 University of Central Florida Maxwell-Gunter 394.0661
## 2 University of Central Florida Eielson 3741.8889
## 3 University of Central Florida Elmendorf 1027.7772
## 4 University of Central Florida Joint Base Elmendorf-Richardson 3799.1088
## 5 University of Central Florida Davis-Monthan Air Force Base 1773.2090
## 6 University of Central Florida Luke Air Force Base 1863.5051
base_school_pairs %>%
group_by(school_name) %>%
slice(which.min(distance))
## # A tibble: 100 x 3
## # Groups: school_name [100]
## school_name base_name distance
## <chr> <chr> <dbl>
## 1 Arizona State University-Tempe Luke Air Force Base 26.4
## 2 Auburn University Maxwell-Gunter 46.4
## 3 Brigham Young University-Idaho Hill Air Force Base 188.
## 4 Brigham Young University-Provo Hill Air Force Base 62.8
## 5 California Polytechnic State University-San … Vandenberg Air Force … 37.6
## 6 California State Polytechnic University-Pomo… March Air Reserve Base 30.2
## 7 California State University-Fresno Vandenberg Air Force … 145.
## 8 California State University-Fullerton Los Angeles Air Force… 26.5
## 9 California State University-Long Beach Los Angeles Air Force… 14.9
## 10 California State University-Los Angeles Los Angeles Air Force… 12.2
## # … with 90 more rows
head(base_school_pairs)
## school_name base_name distance
## 1 University of Central Florida Maxwell-Gunter 394.0661
## 2 University of Central Florida Eielson 3741.8889
## 3 University of Central Florida Elmendorf 1027.7772
## 4 University of Central Florida Joint Base Elmendorf-Richardson 3799.1088
## 5 University of Central Florida Davis-Monthan Air Force Base 1773.2090
## 6 University of Central Florida Luke Air Force Base 1863.5051