In this lesson students will learn how to create
library(tidyverse)
npark <- read_csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/AllTrails%20data%20-%20nationalpark.csv")
#str(npark)
Group by state to create summaries for metrics within a state.
stateNP<-npark%>%
group_by(state_name)%>%
summarise(stateTrails=n(),
avgPop=mean(popularity, na.rm=TRUE),
avgElev=mean(elevation_gain, na.rm=TRUE))
stateNP
## # A tibble: 30 × 4
## state_name stateTrails avgPop avgElev
## <chr> <int> <dbl> <dbl>
## 1 Alaska 29 6.20 386.
## 2 Arizona 174 8.48 727.
## 3 Arkansas 16 7.33 213.
## 4 California 707 8.37 863.
## 5 Colorado 262 12.0 504.
## 6 Florida 31 4.61 2.46
## 7 Georgia 1 4.73 36.9
## 8 Hawaii 35 6.12 391.
## 9 Indiana 16 5.10 37.1
## 10 Kentucky 28 5.80 171.
## # … with 20 more rows
maps
Package#install.packages("maps")
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
states<-map_data("state")
head(states)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
Let’s investigate the data for Oregon.
oregon<-states%>%
filter(region=="oregon")
ggplot(oregon, aes(x=long, y=lat))+
geom_point()
These data allow us to play “connect the dots” to draw the shape of the state of Oregon.
Oh no, what happened?
## LINE
ggplot(oregon, aes(x=long, y=lat))+
geom_line()
We need to tell R what order to connect the dots.
geom_path()
connects the observations in the order
in which they appear in the data.
geom_line()
connects them in order of the variable
on the x axis.
## PATH
ggplot(oregon, aes(x=long, y=lat))+
geom_path()
We can actually think of geographies as generalized polygons!
## POLYGON
ggplot(oregon, aes(x=long, y=lat))+
geom_polygon(fill="forestgreen")
When joining the data to the map we need to have the same variable
name in both. Let’s create a new column named
state_name
.
## JOIN THE MAP AND THE DATA
#head(npark$state_name)
stateNP$state_name<-tolower(stateNP$state_name)
#head(npark$state_name)
stateNP_Map<-states%>%
rename(state_name=region)%>%
left_join(stateNP)
## Joining, by = "state_name"
head(stateNP_Map)
## long lat group order state_name subregion stateTrails avgPop
## 1 -87.46201 30.38968 1 1 alabama <NA> NA NA
## 2 -87.48493 30.37249 1 2 alabama <NA> NA NA
## 3 -87.52503 30.37249 1 3 alabama <NA> NA NA
## 4 -87.53076 30.33239 1 4 alabama <NA> NA NA
## 5 -87.57087 30.32665 1 5 alabama <NA> NA NA
## 6 -87.58806 30.32665 1 6 alabama <NA> NA NA
## avgElev
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
Our first map with the standard color scheme.
## OUR FIRST MAP!
stateNP_Map%>%
ggplot(aes(x=long, y=lat, group=group, fill=stateTrails))+
geom_polygon( color="black")+
theme_bw()+
coord_map()
Viridis is a colorblind friendly color palette that can be used to create accessible heatmaps.
#install.packages("viridis")
library(viridis)
stateNP_Map%>%
ggplot(aes(x=long, y=lat, group = group)) +
geom_polygon(aes(fill = stateTrails),color="black")+
theme_bw()+
coord_map()+
ggtitle("California has the MOST trails, but...")+
scale_fill_viridis(option="viridis", direction = 1)
stateNP_Map%>%
ggplot(aes(x=long, y=lat, group = group)) +
geom_polygon(aes(fill = avgPop),color="black")+
theme_bw()+
coord_map()+
ggtitle("..Oregon trails are the MOST popular")+
scale_fill_viridis(option="viridis", direction = 1)
Create maps to show the distribution of…
Dutch Bros Coffee is a drive-through coffee chain headquartered in Grants Pass, Oregon, with company-owned and franchise locations throughout the United States.
These data represent a list of store locations. There are variables for city, state, and zipcode.
dutch<- read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/dutch_bros_locations.csv",
header=TRUE)
str(dutch)
## 'data.frame': 496 obs. of 3 variables:
## $ city : chr "Apache Junction" "Buckeye" "Bullhead City" "Casa Grande" ...
## $ state : chr "AZ" "AZ" "AZ" "AZ" ...
## $ zipcode: int 85120 85326 86442 85122 85331 85226 86326 86001 86004 85233 ...
We will also want to wrangle our data so that we count the number of locations in a given state.
dutchSt<-dutch%>%
group_by(state)%>%
summarise(n=n())%>%
rename(abbr=state)
head(dutchSt)
## # A tibble: 6 × 2
## abbr n
## <chr> <int>
## 1 AZ 61
## 2 CA 96
## 3 CO 30
## 4 ID 35
## 5 NM 7
## 6 NV 25
When joining the data to the map we need to have the same variable name in both. Let’s create a new column named state in our states data, that contains the state abbreviations, since the dutch data set uses state abbreviations.
## THERE IS INFORMATION ABOUT STATES BUILT INTO R
state.name
## [1] "Alabama" "Alaska" "Arizona" "Arkansas"
## [5] "California" "Colorado" "Connecticut" "Delaware"
## [9] "Florida" "Georgia" "Hawaii" "Idaho"
## [13] "Illinois" "Indiana" "Iowa" "Kansas"
## [17] "Kentucky" "Louisiana" "Maine" "Maryland"
## [21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
## [25] "Missouri" "Montana" "Nebraska" "Nevada"
## [29] "New Hampshire" "New Jersey" "New Mexico" "New York"
## [33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
## [37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
## [41] "South Dakota" "Tennessee" "Texas" "Utah"
## [45] "Vermont" "Virginia" "Washington" "West Virginia"
## [49] "Wisconsin" "Wyoming"
state.abb
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "FL" "GA" "HI" "ID" "IL" "IN" "IA"
## [16] "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ"
## [31] "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VT"
## [46] "VA" "WA" "WV" "WI" "WY"
## ABBREVIATIONS
stNameAbb<-data.frame(state_name=state.name,
abbr=state.abb)
head(stNameAbb)
## state_name abbr
## 1 Alabama AL
## 2 Alaska AK
## 3 Arizona AZ
## 4 Arkansas AR
## 5 California CA
## 6 Colorado CO
## LOWER CASE
stNameAbb$state_name<-tolower(stNameAbb$state_name)
## JOIN
mapDutch<-states%>%
rename(state_name=region)%>%
left_join(stNameAbb)%>% #JOIN ON NAMES AND ABBREVIATIONS
left_join(dutchSt) # JOIN ON DUTCH DATA
## Joining, by = "state_name"
## Joining, by = "abbr"
Dutch Bros on the west coast.
#install.packages("viridis")
library(viridis)
mapDutch%>%
ggplot(aes(x=long, y=lat, group = group)) +
geom_polygon(aes(fill = n),color="black")+
theme_bw()+
coord_map()+
scale_fill_viridis(option="viridis", direction = 1)+
ggtitle("Oregon - Home of Dutch Bros")
#install.packages("plotly")
library(plotly)
p<-mapDutch%>%
ggplot(aes(x, y, group = group)) +
geom_polygon(aes(fill = n),color="black")+
theme_bw()+
coord_fixed()+
scale_fill_viridis(option="viridis", direction = 1)
ggplotly(p)
starbucks <- read_csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/starbucks_directory.csv")
## Rows: 25600 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Brand, Store Number, Store Name, Ownership Type, Street Address, C...
## dbl (2): Longitude, Latitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#str(starbucks)
Let’s look at the format of the column Country:
head(unique(starbucks$Country))
## [1] "AD" "AE" "AR" "AT" "AU" "AW"
length(unique(starbucks$Country))
## [1] 73
map_data
in ggplot2
# install.packages("maps")
library(maps)
# Load map data
world_map <- map_data("world")
A standard map with no adjustments.
world_map%>%
ggplot(aes(map_id = region)) +
geom_map(map = world_map)+
expand_limits(x = world_map$long, y = world_map$lat)
The Mercator projection retains shapes of counties but distorts ratios.
#install.packages("mapproj")
library(mapproj)
## MERCATOR
world_map%>%
ggplot(aes(map_id = region)) +
geom_map(map = world_map)+
expand_limits(x = world_map$long, y = world_map$lat)+
coord_map("mercator", xlim=c(-180,180))
The Mollweide projection preserves ratios.
## MOLLWEIDE
world_map%>%
ggplot(aes(map_id = region)) +
geom_map(map = world_map)+
expand_limits(x = world_map$long, y = world_map$lat)+
coord_map("mollweide", xlim=c(-180,180))
Let’s look at the map data. Note that region is used to specify country, but it has the whole name written out. This might be a problem for us because the Starbucks data uses a two-letter code of each country.
head(world_map)
## long lat group order region subregion
## 1 -69.89912 12.45200 1 1 Aruba <NA>
## 2 -69.89571 12.42300 1 2 Aruba <NA>
## 3 -69.94219 12.43853 1 3 Aruba <NA>
## 4 -70.00415 12.50049 1 4 Aruba <NA>
## 5 -70.06612 12.54697 1 5 Aruba <NA>
## 6 -70.05088 12.59707 1 6 Aruba <NA>
#install.packages("countrycode")
library(countrycode)
starStores<-starbucks%>%
group_by(Country)%>%
summarise(stores=n())%>%
mutate(region=countrycode(Country, origin = 'iso2c', destination = 'country.name'))%>%
arrange(desc(stores))
head(starStores)
## # A tibble: 6 × 3
## Country stores region
## <chr> <int> <chr>
## 1 US 13608 United States
## 2 CN 2734 China
## 3 CA 1468 Canada
## 4 JP 1237 Japan
## 5 KR 993 South Korea
## 6 GB 901 United Kingdom
worldCoffee<-world_map%>%
left_join(starStores, by="region")
We can use the ggthemes package for a map theme.
#install.packages("ggthemes")
library(ggthemes)
worldCoffee%>%
ggplot(aes(map_id = region, fill=stores)) +
geom_map(map = world_map) +
expand_limits(x = world_map$long, y = world_map$lat) +
coord_map("mollweide", xlim=c(-180,180))+
scale_fill_viridis(option="viridis", direction = 1)+
theme_map()
It turns out that only country name in maps that doesn’t use the whole country name is the United States. In the map packages the name is USA.
We can use the lapply function to apply any function to our data.
## USE LAPPLY TO RECODE
starStores<-starbucks%>%
group_by(Country)%>%
summarise(stores=n())%>%
mutate(region=as.character(lapply(Country, function(x){
if(x!="US"){
out=countrycode(x, origin = 'iso2c', destination = 'country.name')
}
if(x=="US"){
out="USA"
}
out})))
worldCoffee2<-world_map%>%
left_join(starStores, by="region")
Let’s try that again..
worldCoffee2%>%
ggplot(aes(map_id = region, fill=stores)) +
geom_map(map = world_map) +
expand_limits(x = world_map$long, y = world_map$lat) +
coord_map("mollweide", xlim=c(-180,180))+
scale_fill_viridis(option="viridis", direction = 1)+
theme_map()
Number of locations within the same city:
## CITIES
starCities<-starbucks%>%
group_by(Country, City)%>%
summarise(lat=mean(Latitude),
long=mean(Longitude),
cityStores=n())
## `summarise()` has grouped output by 'Country'. You can override using the
## `.groups` argument.
#str(worldCoffee2)
Add points layer to plots
## ADD POINTS
worldCoffee2%>%
ggplot() +
geom_map(map = world_map, aes(map_id = region, fill=stores)) +
geom_point(data=starCities, aes(x=long, y=lat, size=cityStores), alpha=.3)+
expand_limits(x = world_map$long, y = world_map$lat) +
coord_map("mollweide", xlim=c(-180,180))+
theme_map()+
scale_fill_viridis(option="viridis", direction = 1)+
guides(color = guide_legend(order=1),
size = guide_legend(order=2))+
theme(legend.position="bottom", legend.box = "vertical")
## Warning: Removed 1 rows containing missing values (geom_point).
The following data are available from the Oregon Health Authority
https://geo.maps.arcgis.com/apps/webappviewer/index.html?id=ea1c78a745c845d899a0184f3581a2ff
## FA24 DATA 502
## COUNTY LEVEL DATA
county<-read.csv("https://raw.githubusercontent.com/kitadasmalley/Teaching/refs/heads/main/ProjectData/countyImmRate.csv")
#str(county)
#View(county)
Get data at the county level. Pay attention to the variables.
## LOAD MAP DATA
or_counties <- map_data("county", "oregon") %>%
select(lon = long, lat, group, id = subregion)
head(or_counties)
## lon lat group id
## 1 -117.2042 44.30683 1 baker
## 2 -117.4907 44.30683 1 baker
## 3 -117.4907 44.38704 1 baker
## 4 -117.5366 44.42142 1 baker
## 5 -117.5709 44.42142 1 baker
## 6 -117.5996 44.43861 1 baker
The the tolower function to make strings lowercase.
## TO LOWER
county$County<-tolower(county$County)
orVac<-or_counties%>%
rename(County=id)%>%
left_join(county)
## Joining, by = "County"
Map of vaccine rates across Oregon counties.
library(viridis)
ggplot(orVac, aes(lon, lat, group = group))+
geom_polygon(aes(fill=All.vaccines), color="black")+
scale_fill_viridis(option="viridis", direction = 1)+
theme_minimal()+
coord_map()