Using data from Thinknum are used to find potential locations for new Chipotle restaurants,Thinknum tracks thousands of websites capturing and indexing vast amounts of public data.
LOADING AND HAVING A LOOK AT THE DATA
#install.packages("dplyr")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ---------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.5.3
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 3.5.3
library(sf)
## Warning: package 'sf' was built under R version 3.5.3
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
# Read datasets/chipotle.csv into a tibble named chipotle using read_csv
chipotle <- read_csv("C:/Users/navee/OneDrive/Documents/datacamp/datasets/chipotle.csv")
## Parsed with column specification:
## cols(
## id = col_double(),
## street = col_character(),
## city = col_character(),
## st = col_character(),
## ctry = col_character(),
## lat = col_double(),
## lon = col_double(),
## closed = col_logical()
## )
# Print out the chipotle tibble using the head function
print(head(chipotle))
## # A tibble: 6 x 8
## id street city st ctry lat lon closed
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <lgl>
## 1 1358023 121 N. La Cienega~ Los Ange~ <NA> United St~ 34.1 -118. TRUE
## 2 1358955 24369 Cedar Rd, Lyndhurst OH United St~ 41.5 -81.5 TRUE
## 3 1359012 1130 West Grove A~ Mesa <NA> United St~ 33.4 -112. TRUE
## 4 1359490 6316 Delmar, St. Louis MO United St~ 38.7 -90.3 TRUE
## 5 1359574 1464 St. Louis Ga~ St. Louis MO United St~ 38.6 -90.3 TRUE
## 6 1359575 8301 Westchester, Dallas TX United St~ 32.9 -96.8 TRUE
Plotting all the closed locations so that we do not reccomend opening a Chipotle in a previously closed location.
# Create a leaflet map of all closed Chipotle stores
closed_chipotles <-
chipotle %>%
# Filter the chipotle tibble to stores with a value of t for closed
filter(closed==TRUE) %>%
leaflet() %>%
# Use addTiles to plot the closed stores on the default Open Street Map tile
addTiles() %>%
# Plot the closed stores using addCircles
addCircles()
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print map of closed chipotles
print(closed_chipotles)
Counting the number of closed Chipotle’s
# Use count from dplyr to count the values for the closed variable
chipotle %>%
filter(closed == TRUE) %>%
count()
## # A tibble: 1 x 1
## n
## <int>
## 1 15
# Create a new tibble named chipotle_open that contains only open chipotle
chipotle_open <- chipotle %>%
filter(closed == FALSE) %>%
# Drop the closed column from chipotle_open
dplyr::select(1:7)
Where’s the closest Chipotle? where aren’t there Chipotles (in the US)? By mapping all of the Chipotle locations on an interactive leaflet map we can start to explore patterns in the geographic distribution of the chain’s locations.
Since there are thousands of store locations, many of which are clustered closely together, we will use a heatmap.
# Pipe chipotle_open into a chain of leaflet functions
chipotle_heatmap <-
chipotle_open%>%
leaflet() %>%
# Use addProviderTiles to add the CartoDB provider tile
addProviderTiles("CartoDB") %>%
# Use addHeatmap with a radius of 8
addHeatmap(radius = 8)
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print heatmap
print(chipotle_heatmap)
Counting the number of Chipotle’s in each State
chipotles_by_state <-
chipotle_open %>%
# Filter the data to only Chipotles in the United States
filter(ctry=="United States") %>%
# Count the number of stores in chipotle_open by st
count(st) %>%
# Arrange the number of stores by state in ascending order
arrange(desc(n))
# Print the state counts
chipotles_by_state
## # A tibble: 48 x 2
## st n
## <chr> <int>
## 1 CA 417
## 2 TX 204
## 3 OH 177
## 4 FL 154
## 5 NY 142
## 6 IL 135
## 7 VA 101
## 8 MD 88
## 9 PA 85
## 10 AZ 80
## # ... with 38 more rows
The chipotle_by_state tibble had 48 rows, but there are 50 fifty states in the US.Meaning there are 2 states which do not have a Chipotle.
# Use the %in% operator to determine which states are in chipotles_by_state
state.abb %in% chipotles_by_state$st
## [1] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [23] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [34] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [45] TRUE TRUE TRUE TRUE TRUE TRUE
# Use the %in% and ! operators to determine which states are not in chipotles_by_state
!(state.abb %in% chipotles_by_state$st)
## [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE
# Create a states_wo_chipotles vector
states_wo_chipotles <- state.abb[!(state.abb %in% chipotles_by_state$st)]
# Print states with no Chipotles
states_wo_chipotles
## [1] "AK" "HI" "SD"
Let’s focus on the only state that does not have a Chipotle: South Dakota. If we were to open a Chipotle location in South Dakota, how might we go about selecting proposed locations? In the following chunks of code, we look at several maps to explore how the location of current Chipotles as well as geographic, transportation, and governmental features of the state may inform this decision.
south_dakota_pop <- readRDS("C:/Users/navee/OneDrive/Documents/datacamp/datasets/south_dakota_pop.rds")
# Create color palette to color map by county population estimate
pal <- colorNumeric(palette = "viridis", domain = south_dakota_pop$estimate)
sd_pop_map <-
south_dakota_pop %>%
leaflet() %>%
addProviderTiles("CartoDB") %>%
# Add county boundaries with addPolygons and color by population estimate
addPolygons(stroke = FALSE, fillOpacity = 0.7, color = ~ pal(estimate),label = ~ NAME) %>%
# Add a legend using addLegend
addLegend(pal = pal, values = ~estimate, title = "Population")
# Print map of South Dakota population by county
print(sd_pop_map)
Minnehaha and Pennington counties really stand out on population map. These counties are home to Sioux Falls and Rapid City, respectively.Sioux Falls has a larger population, but Rapid City is proximate to Badlands National park, which has a million visitors a year. Additionally, we should note that I-90, a major interstate in America, runs through both cities.
# Load chipotle_sd_locations.csv that contains proposed South Dakota locations
chipotle_sd_locations <- read_csv("datasets/chipotle_sd_locations.csv")
## Parsed with column specification:
## cols(
## city = col_character(),
## st = col_character(),
## lat = col_double(),
## lon = col_double(),
## status = col_character()
## )
# limit chipotle store data to locations in states boardering South Dakota
chipotle_market_research <-
chipotle_open %>%
filter(st %in% c("MN", "MT", "ND", "NE", "WY")) %>%
dplyr::select(city, st, lat, lon) %>%
mutate(status = "open") %>%
# bind the data on proposed SD locations onto the open store data
bind_rows(chipotle_sd_locations)
# print the market research data
chipotle_market_research
## # A tibble: 80 x 5
## city st lat lon status
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Lincoln NE 40.8 -96.6 open
## 2 Brooklyn Park MN 45.1 -93.4 open
## 3 Eagan MN 44.8 -93.2 open
## 4 Champlin MN 45.2 -93.4 open
## 5 Woodbury MN 44.9 -92.9 open
## 6 Columbia Heights MN 45.1 -93.2 open
## 7 Fargo ND 46.9 -96.9 open
## 8 Minnetonka MN 44.9 -93.5 open
## 9 Minneapolis MN 45.0 -93.3 open
## 10 Maple Grove MN 45.1 -93.4 open
## # ... with 70 more rows
mapping our proposed Chipotle restaurants in Sioux Falls and Rapid City so we can quickly see how close they are to the nearest open location.
# Create a blue and red color palette to distinguish between open and proposed stores
pal <- colorFactor(palette = c("Blue", "Red"), domain = c("open", "proposed"))
# Map the open and proposed locations
sd_proposed_map <-
chipotle_market_research %>%
leaflet() %>%
# Add the Stamen Toner provider tile
addProviderTiles(providers$Stamen.Toner) %>%
# Apply the pal color palette
addCircles(color = ~pal(status)) %>%
# Draw a circle with a 100 mi radius around the proposed locations
addCircles(data = chipotle_sd_locations, radius = (100 * 1609.34), color = ~pal(status), fill = FALSE) # there approximately 1609.34 meters in a mile
## Assuming "lon" and "lat" are longitude and latitude, respectively
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print the map of proposed locations
print(sd_proposed_map)
There are no Chipotle’s in 100 mile radius of either Sioux Falls and Rapid City, hence we can open one in either locations
# load the Voronoi polygon data
polys <- readRDS("datasets/voronoi_polygons.rds")
voronoi_map <-
polys %>%
leaflet() %>%
# Use the CartoDB provider tile
addProviderTiles("CartoDB") %>%
# Plot Voronoi polygons using addPolygons
addPolygons(fillColor = ~pal(status), weight = 0.5, color = "black") %>%
# Add proposed and open locations as another layer
addCircleMarkers(data = chipotle_sd_locations, label = ~city, color = ~pal(status))
## Assuming "lon" and "lat" are longitude and latitude, respectively
# Print the Voronoi map
print(voronoi_map)
#---------
# Where should the next Chipotle store be?
next_chipotle <- tibble(location = c("Rapid City, SD", "Sioux Falls, SD"),
open_new_store = c(FALSE, TRUE))