Load Libraries

library(tidyverse)
library(ggmap)
library(rvest)
library(geosphere)

Register Google Maps API

register_google(APIKEY)

Load in Data Set with college info

college <- read_csv(colleges_data_set)

head(college)
## # A tibble: 6 x 17
##       id name  city  state region highest_degree control gender admission_rate
##    <dbl> <chr> <chr> <chr> <chr>  <chr>          <chr>   <chr>           <dbl>
## 1 102669 Alas… Anch… AK    West   Graduate       Private CoEd            0.421
## 2 101648 Mari… Mari… AL    South  Associate      Public  CoEd            0.614
## 3 100830 Aubu… Mont… AL    South  Graduate       Public  CoEd            0.802
## 4 101879 Univ… Flor… AL    South  Graduate       Public  CoEd            0.679
## 5 100858 Aubu… Aubu… AL    South  Graduate       Public  CoEd            0.835
## 6 100663 Univ… Birm… AL    South  Graduate       Public  CoEd            0.857
## # … with 8 more variables: sat_avg <dbl>, undergrads <dbl>, tuition <dbl>,
## #   faculty_salary_avg <dbl>, loan_default_rate <chr>, median_debt <dbl>,
## #   lon <dbl>, lat <dbl>

Get just the top 100 Colleges by undergrad Population

college <- college %>%
  slice_max(undergrads, n=100) %>%
  select(name, state,city, undergrads,lon,lat)

head(college)
## # A tibble: 6 x 6
##   name                               state city          undergrads    lon   lat
##   <chr>                              <chr> <chr>              <dbl>  <dbl> <dbl>
## 1 University of Central Florida      FL    Orlando            52280  -81.4  28.5
## 2 Liberty University                 VA    Lynchburg          49340  -79.1  37.4
## 3 Texas A & M University-College St… TX    College Stat…      46941  -96.3  30.6
## 4 Ohio State University-Main Campus  OH    Columbus           43733  -83.0  40.0
## 5 Pennsylvania State University-Mai… PA    University P…      39958  -77.9  40.8
## 6 Arizona State University-Tempe     AZ    Tempe              39316 -112.   33.4

Scrape airforce base locations and save that to bases variable

  url <- 'https://www.military.com/base-guide/browse-by-service/air-force'
  selector <- ".bullet-section-summary:nth-child(1) a"
  page <- read_html(url)
  
  value <- page %>%
    html_nodes(selector) %>%
    html_text()
  
  
bases <- tibble(base_name = value)

head(bases)
## # A tibble: 6 x 1
##   base_name                      
##   <chr>                          
## 1 Maxwell-Gunter                 
## 2 Eielson                        
## 3 Elmendorf                      
## 4 Joint Base Elmendorf-Richardson
## 5 Davis-Monthan Air Force Base   
## 6 Luke Air Force Base

add a column to the bases Dataset with geocdelocations for each abse.

bases <- bases %>%
  mutate(location =geocode(base_name))

head(bases)
## # A tibble: 6 x 2
##   base_name                       location$lon  $lat
##   <chr>                                  <dbl> <dbl>
## 1 Maxwell-Gunter                         -86.2  32.4
## 2 Eielson                               -147.   64.6
## 3 Elmendorf                              -98.3  29.3
## 4 Joint Base Elmendorf-Richardson       -150.   61.3
## 5 Davis-Monthan Air Force Base          -111.   32.2
## 6 Luke Air Force Base                   -112.   33.5

Notice that location Column in bases contains a list, with lat and lon. Need to seperate those out into seperate columns

bases <- bases %>%
  mutate(lat = location$lat,lon = location$lon) %>%
  select(base_name, lon,lat)

head(bases)
## # A tibble: 6 x 3
##   base_name                          lon   lat
##   <chr>                            <dbl> <dbl>
## 1 Maxwell-Gunter                   -86.2  32.4
## 2 Eielson                         -147.   64.6
## 3 Elmendorf                        -98.3  29.3
## 4 Joint Base Elmendorf-Richardson -150.   61.3
## 5 Davis-Monthan Air Force Base    -111.   32.2
## 6 Luke Air Force Base             -112.   33.5

Merge the bases Dataset with the College Dataset save that to base_school_pairs

base_school_pairs <- merge(bases, college, by=NULL)

head(base_school_pairs)
##                         base_name      lon.x    lat.x
## 1                  Maxwell-Gunter  -86.24027 32.41310
## 2                         Eielson -147.06382 64.64310
## 3                       Elmendorf  -98.33279 29.25607
## 4 Joint Base Elmendorf-Richardson -149.69335 61.25450
## 5    Davis-Monthan Air Force Base -110.85926 32.16755
## 6             Luke Air Force Base -112.37550 33.54149
##                            name state    city undergrads     lon.y    lat.y
## 1 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 2 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 3 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 4 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 5 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 6 University of Central Florida    FL Orlando      52280 -81.37924 28.53834

Create a function that will calculate the distance between the School locationa and Air Force bases

point_distance <- Vectorize(function(start_lon,start_lat, finish_lon, finish_lat){
  require(geosphere)
  start <- c(start_lon,start_lat)
  finish <- c(finish_lon,finish_lat)
  distance <- distGeo(start,finish)/1609.34
  return(distance)
})
base_school_pairs <- base_school_pairs %>%
  mutate(distance = point_distance(lon.x,lat.x,lon.y,lat.y))

head(base_school_pairs)
##                         base_name      lon.x    lat.x
## 1                  Maxwell-Gunter  -86.24027 32.41310
## 2                         Eielson -147.06382 64.64310
## 3                       Elmendorf  -98.33279 29.25607
## 4 Joint Base Elmendorf-Richardson -149.69335 61.25450
## 5    Davis-Monthan Air Force Base -110.85926 32.16755
## 6             Luke Air Force Base -112.37550 33.54149
##                            name state    city undergrads     lon.y    lat.y
## 1 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 2 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 3 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 4 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 5 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
## 6 University of Central Florida    FL Orlando      52280 -81.37924 28.53834
##    distance
## 1  394.0661
## 2 3741.8889
## 3 1027.7772
## 4 3799.1088
## 5 1773.2090
## 6 1863.5051

Select on the columns that we need

base_school_pairs <- base_school_pairs %>%
  select(school_name = name, base_name, distance)

head(base_school_pairs)
##                     school_name                       base_name  distance
## 1 University of Central Florida                  Maxwell-Gunter  394.0661
## 2 University of Central Florida                         Eielson 3741.8889
## 3 University of Central Florida                       Elmendorf 1027.7772
## 4 University of Central Florida Joint Base Elmendorf-Richardson 3799.1088
## 5 University of Central Florida    Davis-Monthan Air Force Base 1773.2090
## 6 University of Central Florida             Luke Air Force Base 1863.5051

as we can see from the above this returns the distance of each airforce base to a school. We need to now return the shortest distance. for this we can use the slice_min

base_school_pairs %>%
  group_by(school_name) %>%
  slice_min(distance)
## # A tibble: 101 x 3
## # Groups:   school_name [100]
##    school_name                                   base_name              distance
##    <chr>                                         <chr>                     <dbl>
##  1 Arizona State University-Tempe                Luke Air Force Base        26.4
##  2 Auburn University                             Maxwell-Gunter             46.4
##  3 Brigham Young University-Idaho                Hill Air Force Base       188. 
##  4 Brigham Young University-Provo                Hill Air Force Base        62.8
##  5 California Polytechnic State University-San … Vandenberg Air Force …     37.6
##  6 California State Polytechnic University-Pomo… March Air Reserve Base     30.2
##  7 California State University-Fresno            Vandenberg Air Force …    145. 
##  8 California State University-Fullerton         Los Angeles Air Force…     26.5
##  9 California State University-Long Beach        Los Angeles Air Force…     14.9
## 10 California State University-Los Angeles       Los Angeles Air Force…     12.2
## # … with 91 more rows
head(base_school_pairs)
##                     school_name                       base_name  distance
## 1 University of Central Florida                  Maxwell-Gunter  394.0661
## 2 University of Central Florida                         Eielson 3741.8889
## 3 University of Central Florida                       Elmendorf 1027.7772
## 4 University of Central Florida Joint Base Elmendorf-Richardson 3799.1088
## 5 University of Central Florida    Davis-Monthan Air Force Base 1773.2090
## 6 University of Central Florida             Luke Air Force Base 1863.5051
base_school_pairs %>%
  group_by(school_name) %>%
  slice(which.min(distance))
## # A tibble: 100 x 3
## # Groups:   school_name [100]
##    school_name                                   base_name              distance
##    <chr>                                         <chr>                     <dbl>
##  1 Arizona State University-Tempe                Luke Air Force Base        26.4
##  2 Auburn University                             Maxwell-Gunter             46.4
##  3 Brigham Young University-Idaho                Hill Air Force Base       188. 
##  4 Brigham Young University-Provo                Hill Air Force Base        62.8
##  5 California Polytechnic State University-San … Vandenberg Air Force …     37.6
##  6 California State Polytechnic University-Pomo… March Air Reserve Base     30.2
##  7 California State University-Fresno            Vandenberg Air Force …    145. 
##  8 California State University-Fullerton         Los Angeles Air Force…     26.5
##  9 California State University-Long Beach        Los Angeles Air Force…     14.9
## 10 California State University-Los Angeles       Los Angeles Air Force…     12.2
## # … with 90 more rows
head(base_school_pairs)
##                     school_name                       base_name  distance
## 1 University of Central Florida                  Maxwell-Gunter  394.0661
## 2 University of Central Florida                         Eielson 3741.8889
## 3 University of Central Florida                       Elmendorf 1027.7772
## 4 University of Central Florida Joint Base Elmendorf-Richardson 3799.1088
## 5 University of Central Florida    Davis-Monthan Air Force Base 1773.2090
## 6 University of Central Florida             Luke Air Force Base 1863.5051