UPDATED SPRING 2025
In this lesson students will learn how to create
Time series plots show how a variable (on the y-axis) changes over time (on the x-axis).
library(tidyverse)
salem<- read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/salemOR_AQI.csv",
header=TRUE)
#str(salem)
geom_line()
Let’s just try using geom_line()
:
ggplot(salem, aes(date, pm25))+
geom_line()
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
What’s wrong with this?
salem$date<-as.Date(salem$date)
ggplot(salem, aes(date, pm25))+
geom_line()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
We can do a little wrangling to add a column for air quality rating, as definited here:
https://aqicn.org/data-platform/register/
## AIR QUALITY
## a little wrangling
salem<-salem%>%
mutate(quality=as.character(lapply(pm25, function(x){
out=NA
if(is.na(x)==FALSE){
if(x %in% c(0:50)){
out="Good"
}
if(x %in% c(51:100)){
out="Moderate"
}
if(x %in% c(101:150)){
out="Unhealthy Sensitive" # Unhealthy for Sensitive Groups
}
if(x %in% c(151:200)){
out="Unhealthy"
}
if(x %in% c(201:300)){
out="Very Unhealthy"
}
if(x > 300){
out="Hazardous"
}
}
out
})))
Order the rating.
salem$quality<-factor(salem$quality,
levels=c("Good", "Moderate",
"Unhealthy Sensitive", "Unhealthy",
"Very Unhealthy","Hazardous" ))
pal<-c("forestgreen", "gold", "darkorange", "firebrick3", "purple3", "darkred")
## ADD POINTS
ggplot(salem, aes(date, pm25))+
geom_point(aes(color=quality))+
geom_line()+
scale_color_manual(values=pal)+
theme_minimal()
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
These data are in three separate files:
coin_Bitcoin <- read_csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/coin_Bitcoin.csv")
coin_Dogecoin <- read_csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/coin_Dogecoin.csv")
coin_Ethereum <- read_csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/coin_Ethereum.csv")
coinBind<-coin_Bitcoin %>%
rbind(coin_Dogecoin)%>%
rbind(coin_Ethereum)
Since Date
is already a date type variable we can go
ahead and plot it. Here color=Name
works as a grouping
variable.
#str(coinBind)
ggplot(coinBind, aes(x=Date, y=Volume, color=Name))+
geom_line()
npark <- read_csv("https://raw.githubusercontent.com/kitadasmalley/DATA151/main/Data/AllTrails%20data%20-%20nationalpark.csv")
## Rows: 3313 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, area_name, city_name, state_name, country_name, _geoloc, rou...
## dbl (8): trail_id, popularity, length, elevation_gain, difficulty_rating, v...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#str(npark)
Group by state to create summaries for metrics within a state.
stateNP<-npark%>%
group_by(state_name)%>%
summarise(stateTrails=n(),
avgPop=mean(popularity, na.rm=TRUE),
avgElev=mean(elevation_gain, na.rm=TRUE))
maps
Package#install.packages("maps")
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
states<-map_data("state")
head(states)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
Let’s investigate the data for Oregon.
oregon<-states%>%
filter(region=="oregon")
ggplot(oregon, aes(x=long, y=lat))+
geom_point()
These data allow us to play “connect the dots” to draw the shape of the state of Oregon.
Oh no, what happened?
## LINE
ggplot(oregon, aes(x=long, y=lat))+
geom_line()
We need to tell R what order to connect the dots.
geom_path()
connects the observations in the order
in which they appear in the data.
geom_line()
connects them in order of the variable
on the x axis.
## PATH
ggplot(oregon, aes(x=long, y=lat))+
geom_path()
We can actually think of geographies as generalized polygons!
## POLYGON
ggplot(oregon, aes(x=long, y=lat))+
geom_polygon(fill="forestgreen")
When joining the data to the map we need to have the same variable
name in both. Let’s create a new column named
state_name
.
## JOIN THE MAP AND THE DATA
#head(npark$state_name)
stateNP$state_name<-tolower(stateNP$state_name)
#head(npark$state_name)
stateNP_Map<-states%>%
rename(state_name=region)%>%
left_join(stateNP)
## Joining with `by = join_by(state_name)`
head(stateNP_Map)
## long lat group order state_name subregion stateTrails avgPop
## 1 -87.46201 30.38968 1 1 alabama <NA> NA NA
## 2 -87.48493 30.37249 1 2 alabama <NA> NA NA
## 3 -87.52503 30.37249 1 3 alabama <NA> NA NA
## 4 -87.53076 30.33239 1 4 alabama <NA> NA NA
## 5 -87.57087 30.32665 1 5 alabama <NA> NA NA
## 6 -87.58806 30.32665 1 6 alabama <NA> NA NA
## avgElev
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## OUR FIRST MAP!
#install.packages("mapproj")
library(mapproj)
stateNP_Map%>%
ggplot(aes(x=long, y=lat, group=group, fill=stateTrails))+
geom_polygon( color="black")+
theme_bw()+
coord_map()
Viridis is a colorblind friendly color palette that can be used to create accessible heatmaps.
#install.packages("viridis")
library(viridis)
stateNP_Map%>%
ggplot(aes(x=long, y=lat, group = group)) +
geom_polygon(aes(fill = stateTrails),color="black")+
theme_bw()+
coord_map()+
ggtitle("California has the MOST trails, but...")+
scale_fill_viridis(option="viridis", direction = 1)
stateNP_Map%>%
ggplot(aes(x=long, y=lat, group = group)) +
geom_polygon(aes(fill = avgPop),color="black")+
theme_bw()+
coord_map()+
ggtitle("..Oregon trails are the MOST popular")+
scale_fill_viridis(option="viridis", direction = 1)
Create maps to show the distribution of…