Using data from Thinknum are used to find potential locations for new Chipotle restaurants,Thinknum tracks thousands of websites capturing and indexing vast amounts of public data.

LOADING AND HAVING A LOOK AT THE DATA

#install.packages("dplyr")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ---------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.5.3
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 3.5.3
library(sf)
## Warning: package 'sf' was built under R version 3.5.3
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
# Read datasets/chipotle.csv into a tibble named chipotle using read_csv
chipotle <- read_csv("C:/Users/navee/OneDrive/Documents/datacamp/datasets/chipotle.csv")
## Parsed with column specification:
## cols(
##   id = col_double(),
##   street = col_character(),
##   city = col_character(),
##   st = col_character(),
##   ctry = col_character(),
##   lat = col_double(),
##   lon = col_double(),
##   closed = col_logical()
## )
# Print out the chipotle tibble using the head function
print(head(chipotle))
## # A tibble: 6 x 8
##        id street             city      st    ctry         lat    lon closed
##     <dbl> <chr>              <chr>     <chr> <chr>      <dbl>  <dbl> <lgl> 
## 1 1358023 121 N. La Cienega~ Los Ange~ <NA>  United St~  34.1 -118.  TRUE  
## 2 1358955 24369 Cedar Rd,    Lyndhurst OH    United St~  41.5  -81.5 TRUE  
## 3 1359012 1130 West Grove A~ Mesa      <NA>  United St~  33.4 -112.  TRUE  
## 4 1359490 6316 Delmar,       St. Louis MO    United St~  38.7  -90.3 TRUE  
## 5 1359574 1464 St. Louis Ga~ St. Louis MO    United St~  38.6  -90.3 TRUE  
## 6 1359575 8301 Westchester,  Dallas    TX    United St~  32.9  -96.8 TRUE

Plotting all the closed locations so that we do not reccomend opening a Chipotle in a previously closed location.

# Create a leaflet map of all closed Chipotle stores
closed_chipotles <- 
chipotle %>% 
  # Filter the chipotle tibble to stores with a value of t for closed
  filter(closed==TRUE) %>% 
  leaflet() %>% 
  # Use addTiles to plot the closed stores on the default Open Street Map tile
  addTiles() %>%
  # Plot the closed stores using addCircles
  addCircles() 
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print map of closed chipotles
print(closed_chipotles)

Counting the number of closed Chipotle’s

# Use count from dplyr to count the values for the closed variable
chipotle %>% 
  filter(closed == TRUE) %>%
    count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1    15
# Create a new tibble named chipotle_open that contains only open chipotle 
chipotle_open <- chipotle %>% 
  filter(closed == FALSE) %>% 
  # Drop the closed column from chipotle_open
  dplyr::select(1:7)

Where’s the closest Chipotle? where aren’t there Chipotles (in the US)? By mapping all of the Chipotle locations on an interactive leaflet map we can start to explore patterns in the geographic distribution of the chain’s locations.

Since there are thousands of store locations, many of which are clustered closely together, we will use a heatmap.

# Pipe chipotle_open into a chain of leaflet functions
chipotle_heatmap <- 
chipotle_open%>% 
  leaflet() %>% 
  # Use addProviderTiles to add the CartoDB provider tile 
  addProviderTiles("CartoDB") %>%
  # Use addHeatmap with a radius of 8
  addHeatmap(radius = 8)
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print heatmap
print(chipotle_heatmap)

Counting the number of Chipotle’s in each State

chipotles_by_state <- 
chipotle_open %>% 
  # Filter the data to only Chipotles in the United States
  filter(ctry=="United States") %>% 
  # Count the number of stores in chipotle_open by st
  count(st) %>% 
  # Arrange the number of stores by state in ascending order
  arrange(desc(n))

# Print the state counts
chipotles_by_state
## # A tibble: 48 x 2
##    st        n
##    <chr> <int>
##  1 CA      417
##  2 TX      204
##  3 OH      177
##  4 FL      154
##  5 NY      142
##  6 IL      135
##  7 VA      101
##  8 MD       88
##  9 PA       85
## 10 AZ       80
## # ... with 38 more rows

The chipotle_by_state tibble had 48 rows, but there are 50 fifty states in the US.Meaning there are 2 states which do not have a Chipotle.

# Use the %in% operator to determine which states are in chipotles_by_state
state.abb %in% chipotles_by_state$st
##  [1]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [12]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [23]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [34]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
## [45]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
# Use the %in% and ! operators to determine which states are not in chipotles_by_state
!(state.abb %in% chipotles_by_state$st)
##  [1] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE
# Create a states_wo_chipotles vector
states_wo_chipotles <- state.abb[!(state.abb %in% chipotles_by_state$st)]

# Print states with no Chipotles
states_wo_chipotles
## [1] "AK" "HI" "SD"

Let’s focus on the only state that does not have a Chipotle: South Dakota. If we were to open a Chipotle location in South Dakota, how might we go about selecting proposed locations? In the following chunks of code, we look at several maps to explore how the location of current Chipotles as well as geographic, transportation, and governmental features of the state may inform this decision.

south_dakota_pop <- readRDS("C:/Users/navee/OneDrive/Documents/datacamp/datasets/south_dakota_pop.rds")

# Create color palette to color map by county population estimate
pal <- colorNumeric(palette = "viridis", domain = south_dakota_pop$estimate)

sd_pop_map <-
  south_dakota_pop %>%
  leaflet() %>%
  addProviderTiles("CartoDB") %>%
  # Add county boundaries with addPolygons and color by population estimate
  addPolygons(stroke = FALSE, fillOpacity = 0.7, color = ~ pal(estimate),label = ~ NAME) %>%
  
  # Add a legend using addLegend 
  addLegend(pal = pal, values = ~estimate, title = "Population")

# Print map of South Dakota population by county
print(sd_pop_map)

Minnehaha and Pennington counties really stand out on population map. These counties are home to Sioux Falls and Rapid City, respectively.Sioux Falls has a larger population, but Rapid City is proximate to Badlands National park, which has a million visitors a year. Additionally, we should note that I-90, a major interstate in America, runs through both cities.

# Load chipotle_sd_locations.csv that contains proposed South Dakota locations  
chipotle_sd_locations <- read_csv("datasets/chipotle_sd_locations.csv")
## Parsed with column specification:
## cols(
##   city = col_character(),
##   st = col_character(),
##   lat = col_double(),
##   lon = col_double(),
##   status = col_character()
## )
# limit chipotle store data to locations in states boardering South Dakota
chipotle_market_research <- 
  chipotle_open %>% 
  filter(st %in% c("MN", "MT", "ND", "NE", "WY")) %>% 
  dplyr::select(city, st, lat, lon) %>% 
  mutate(status = "open") %>% 
  # bind the data on proposed SD locations onto the open store data
  bind_rows(chipotle_sd_locations) 

# print the market research data
chipotle_market_research
## # A tibble: 80 x 5
##    city             st      lat   lon status
##    <chr>            <chr> <dbl> <dbl> <chr> 
##  1 Lincoln          NE     40.8 -96.6 open  
##  2 Brooklyn Park    MN     45.1 -93.4 open  
##  3 Eagan            MN     44.8 -93.2 open  
##  4 Champlin         MN     45.2 -93.4 open  
##  5 Woodbury         MN     44.9 -92.9 open  
##  6 Columbia Heights MN     45.1 -93.2 open  
##  7 Fargo            ND     46.9 -96.9 open  
##  8 Minnetonka       MN     44.9 -93.5 open  
##  9 Minneapolis      MN     45.0 -93.3 open  
## 10 Maple Grove      MN     45.1 -93.4 open  
## # ... with 70 more rows

mapping our proposed Chipotle restaurants in Sioux Falls and Rapid City so we can quickly see how close they are to the nearest open location.

# Create a blue and red color palette to distinguish between open and proposed stores
pal <- colorFactor(palette = c("Blue", "Red"), domain = c("open", "proposed"))

# Map the open and proposed locations
sd_proposed_map <-
  chipotle_market_research %>% 
  leaflet() %>% 
  # Add the Stamen Toner provider tile
  addProviderTiles(providers$Stamen.Toner) %>%
  # Apply the pal color palette
  addCircles(color = ~pal(status)) %>%
  # Draw a circle with a 100 mi radius around the proposed locations
  addCircles(data = chipotle_sd_locations, radius = (100 * 1609.34), color = ~pal(status), fill = FALSE) # there approximately 1609.34 meters in a mile
## Assuming "lon" and "lat" are longitude and latitude, respectively
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print the map of proposed locations 
print(sd_proposed_map)

There are no Chipotle’s in 100 mile radius of either Sioux Falls and Rapid City, hence we can open one in either locations

# load the Voronoi polygon data 
polys <- readRDS("datasets/voronoi_polygons.rds")

voronoi_map <- 
  polys %>%
  leaflet() %>%
  # Use the CartoDB provider tile
  addProviderTiles("CartoDB") %>%
  # Plot Voronoi polygons using addPolygons
  addPolygons(fillColor = ~pal(status), weight = 0.5, color = "black") %>%
  # Add proposed and open locations as another layer
  addCircleMarkers(data = chipotle_sd_locations, label = ~city, color = ~pal(status))
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print the Voronoi map
print(voronoi_map)

#---------

# Where should the next Chipotle store be? 
next_chipotle <- tibble(location = c("Rapid City, SD", "Sioux Falls, SD"),
                        open_new_store = c(FALSE, TRUE))