Lab 04 - La Quinta is Spanish for next to Denny’s, Pt. 2

Insert your name here Insert date here

Load packages and data

library(tidyverse) 
library(dsbox) 
states   <- read_csv("states.csv", show_col_types = FALSE)

Exercise 1

dn_ak <- dennys %>%
  filter(state == "AK")

nrow(dn_ak)
## [1] 3

Exercise 2

lq_ak <- laquinta %>%
  filter(state == "AK")
nrow(lq_ak)
## [1] 2

Exercise 3

# Exercise 3
nrow(dn_ak) * nrow(lq_ak)
## [1] 6

Exercise 4

dn_lq_ak <- full_join(dn_ak, lq_ak, by = "state")
## Warning in full_join(dn_ak, lq_ak, by = "state"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
dn_lq_ak
## # A tibble: 6 × 11
##   address.x     city.x state zip.x longitude.x latitude.x address.y city.y zip.y
##   <chr>         <chr>  <chr> <chr>       <dbl>      <dbl> <chr>     <chr>  <chr>
## 1 2900 Denali   Ancho… AK    99503       -150.       61.2 3501 Min… "\nAn… 99503
## 2 2900 Denali   Ancho… AK    99503       -150.       61.2 4920 Dal… "\nFa… 99709
## 3 3850 Debarr … Ancho… AK    99508       -150.       61.2 3501 Min… "\nAn… 99503
## 4 3850 Debarr … Ancho… AK    99508       -150.       61.2 4920 Dal… "\nFa… 99709
## 5 1929 Airport… Fairb… AK    99701       -148.       64.8 3501 Min… "\nAn… 99503
## 6 1929 Airport… Fairb… AK    99701       -148.       64.8 4920 Dal… "\nFa… 99709
## # ℹ 2 more variables: longitude.y <dbl>, latitude.y <dbl>

Exercise 5

haversine <- function(long1, lat1, long2, lat2, round = 3) {
  # convert to radians
  long1 = long1 * pi / 180
  lat1  = lat1  * pi / 180
  long2 = long2 * pi / 180
  lat2  = lat2  * pi / 180
  
  R = 6371 # Earth mean radius in km
  
  a = sin((lat2 - lat1)/2)^2 + cos(lat1) * cos(lat2) * sin((long2 - long1)/2)^2
  d = R * 2 * asin(sqrt(a))
  
  return( round(d,round) ) # distance in km
}

Exercise 6

# Load the haversine function (run this)
haversine <- function(long1, lat1, long2, lat2, round = 3) {
  long1 = long1 * pi / 180
  lat1  = lat1  * pi / 180
  long2 = long2 * pi / 180
  lat2  = lat2  * pi / 180
  
  R = 6371 # Earth mean radius in km
  
  a = sin((lat2 - lat1)/2)^2 + cos(lat1) * cos(lat2) * sin((long2 - long1)/2)^2
  d = R * 2 * asin(sqrt(a))
  
  return(round(d, round))
}
dn_lq_ak <- dn_lq_ak %>%
  mutate(distance = haversine(longitude.x, latitude.x, 
                              longitude.y, latitude.y))

Useing mutate

Exercise 7

dn_lq_ak_mindist <- dn_lq_ak %>%
  group_by(address.x) %>%
  summarise(closest = min(distance))

Exercise 8

dn_nc <- dennys %>% filter(state == "NC")
lq_nc <- laquinta %>% filter(state == "NC")

dn_tx <- dennys %>% filter(state == "TX")
lq_tx <- laquinta %>% filter(state == "TX")
dn_lq_ak %>%
  filter(dn_lq_ak$distance %in% dn_lq_ak_mindist$closest) %>%
  ggplot() +
  geom_point(mapping = aes(
    x = longitude.x,
    y = latitude.x,
    color = "blue"
    )) +
  geom_point(mapping = aes(
    x = longitude.y,
    y = latitude.y,
    color = "green"
    )) +
  labs(
  title = "Danny's and La Quinta Locations",
  subtitle = "in Alaska",
  x = "Longitude of establishements", 
  y = "Latitude of establishements", 
  color = "Establishment"
     ) 

The distances in Alaska are quite large. Most Denny’s are over 100 km away from the nearest La Quinta.

Exercise 9

ggplot() +
  geom_point(data = dn_nc, aes(x = longitude, y = latitude, color = "Denny's"), 
             size = 3, alpha = 0.9) +
  geom_point(data = lq_nc, aes(x = longitude, y = latitude, color = "La Quinta"), 
             size = 3, alpha = 0.9) +
  labs(title = "Denny's and La Quinta Locations in North Carolina",
       x = "Longitude",
       y = "Latitude",
       color = "Establishment") +
  theme_minimal()

Exercise 10

ggplot() +
  geom_point(data = dn_tx, aes(x = longitude, y = latitude, color = "Denny's"), 
             size = 3, alpha = 0.9) +
  geom_point(data = lq_tx, aes(x = longitude, y = latitude, color = "La Quinta"), 
             size = 3, alpha = 0.9) +
  labs(title = "Denny's and La Quinta Locations in Texas",
       x = "Longitude",
       y = "Latitude",
       color = "Establishment") +
  theme_minimal()

Exercise 11

dn_ca <- dennys %>% filter(state == "CA")
lq_ca <- laquinta %>% filter(state == "CA")

dn_lq_ca <- full_join(dn_ca, lq_ca, by = "state") %>%
  mutate(distance = haversine(longitude.x, latitude.x, 
                              longitude.y, latitude.y))
## Warning in full_join(dn_ca, lq_ca, by = "state"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
dn_lq_ca_mindist <- dn_lq_ca %>%
  group_by(address.x) %>%
  summarise(closest = min(distance))

ggplot() +
  geom_point(data = dn_ca, aes(x = longitude, y = latitude, color = "Denny's"), 
             size = 2.5, alpha = 0.9) +
  geom_point(data = lq_ca, aes(x = longitude, y = latitude, color = "La Quinta"), 
             size = 2.5, alpha = 0.9) +
  labs(title = "Denny's and La Quinta Locations in California",
       x = "Longitude",
       y = "Latitude",
       color = "Establishment") +
  theme_minimal()

Exercise 12

Texas has the smallest distances overall, which means Denny’s and La Quinta are frequently right next to each other. North Carolina is decent, but Texas shows the strongest clustering. Alaska has very large distances.

Add exercise headings as needed.