In this sesssion we’ll be covering:
plot()
plot(x, y)
x
is a numeric vector that will be the x-axis coordinates of the ploty
is a numeric vector (of the same length as x
) that will be the y-axis coordinates of the plotbenzene <- c(1.3, 4.5, 2.6, 3.4, 6.4)
day <- c(1, 2, 3, 4, 5)
plot(x = day, y = benzene)
However, more typically you will be plotting using some existing data that you’ve imported into a data table. We will be using the chicago_air
data frame to create our air quality graphs. Use the data()
function to load the data from the region5air
package.
#library(devtools)
#install_github("natebyers/region5air")
library(region5air)
data(chicago_air)
head(chicago_air)
## date ozone temp solar month weekday
## 1 2013-01-01 0.032 17 0.65 1 3
## 2 2013-01-02 0.020 15 0.61 1 4
## 3 2013-01-03 0.021 28 0.17 1 5
## 4 2013-01-04 0.028 18 0.62 1 6
## 5 2013-01-05 0.025 26 0.48 1 7
## 6 2013-01-06 0.026 36 0.47 1 1
We will start with some basic graphs of the Chicago ozone data and then work on some overlay plots. Then we will use the lattice
and ggplot2
packages to display the same data in alternative ways.
chicago_air$date <- as.Date(chicago_air$date)
xy.plot <- plot(chicago_air$temp, chicago_air$ozone) #Basic scatterplot
par(mfrow = c(1,2)) # Use par() to set up the plotting area. In this case we are telling it the plot area will be 1 row and 2 columns to accomodate our 2 graphs.
l.plot <- plot(chicago_air$date, chicago_air$ozone,
type='l',
pch = 16,
col = "purple",
lwd = 2.5,
xlab="Date",
ylab = 'Ozone (ppm)',
main = '2013 Chicago Ozone Data') # Line plot with labels and colorful lines. See plot? for all the graphing options.
l.plot2 <- plot(chicago_air$date[1:200], chicago_air$ozone[1:200],
type='l', xlab = "Date", ylab= 'Ozone (ppm)',
main = '2013 Chicago Ozone Data') # Subsetted plot
dev.off() # Must use dev.off to clear the plot area setting introduced by par()
## null device
## 1
b.plot <- barplot(height = chicago_air$ozone,
names.arg = chicago_air$date,
xlab="Date",
ylab="Ozone (ppm)",
main = '2013 Chicago Ozone Data')
abline(h=.070, col="red") # use abline() to add a line representing the NAAQS standard
# you can also make vertical lines, or an intercept and slope with this funciton
We will be using the ozone plot as a back drop for plotting the temperature data as points.
In order to add points to a bar plot, we’ll use the x locations that are found in the b.plot
variable.
op <- par(mar=c(5, 4, 4, 6) + 0.1) # Here we use par() to set the margins of the graphing area
par(new = TRUE) # Use par() to plot a new figure on top of the current plot
plot(x = b.plot, y = chicago_air$temp, xlab = "", ylab = "", pch = 16,
ylim = c(0.5*min(chicago_air$temp, na.rm = TRUE), 2*max(chicago_air$temp, na.rm = TRUE)),
axes = FALSE, col = "blue")
# add a label for the new axis
mtext("Air Temperature", side = 4, line=3, cex = par("cex.lab"), col = "blue")
Finally, we add a legend.
legend("topright", # places a legend at the appropriate place, can also use x,y coordinates
legend = c("Ozone","Temp"), # puts text in the legend
lty=c(1, 0), pch = c(NA, 19), # gives the legend appropriate symbols
col=c("black","blue")) # appropriate colors
# reset graphing parameters to original default settings
par(op)
lattice
is another very popular graphics package. For an introduction and examples see here and here. We’ll also use another package called latticeExtra
which will make it easier to add the second y-axis to our graph
Here’s a first attempt at a lattice
graph:
install.packages("lattice")
library(lattice)
install.packages("latticeExtra")
library(latticeExtra)
#We'll make a basic line graph first
ozone <- xyplot(ozone ~ date, data = chicago_air, type = "l",
ylab = "Ozone (ppm)")
plot(ozone)
Then we’ll create a bar graph with temperature overlaid so that it looks like our previous graphs
ozone.plot <- xyplot(ozone ~ date, data = chicago_air, type = "h",
ylab = "Ozone (ppm)")
plot(ozone.plot)
# a scatter plot of the saturation data
temp.plot <- xyplot(temp ~ date, data = chicago_air, type = "p", ylab = "Temp",
ylim = c(0, 2*max(chicago_air$temp, na.rm = TRUE)))
plot(temp.plot)
# display both on the same graph
doubleYScale(ozone.plot, temp.plot, add.axis= TRUE, add.ylab2 = TRUE, under = FALSE,
text = c( "Ozone (ppm)", "Temp (F)"), columns = 2, # legend info
type = c("l", "p")) #legend info
We can change the format of the plot to look like our previous plots by using the update()
function.
update(trellis.last.object(),
par.settings = simpleTheme(col = c("black", "red"), pch = c(NA, 16)))
The advantage of the lattice
package is that it allows you to easily split up the data by conditioning. We’ll create some variables that will allow us to split up our data by time periods. This is done using the factor function in R.
factor()
Remember when we were importing data using read.csv
and we set stringsAsFactors = FALSE
? Factors are variables in R that are treated as categorical variables or ‘levels’ of data. This can be helpful for graphing and statistical summaries. In this case we would like to create a factor variable for months and weekdays and plot our data by those time periods.
First we’ll make a generic factor variable to see how it works.
data <- c(1,2,2,3,1,2,3,3,1,2,3,3,1)
fdata <- factor(data)
fdata
## [1] 1 2 2 3 1 2 3 3 1 2 3 3 1
## Levels: 1 2 3
display.data <- factor(data, labels = c("I","II","III"))
display.data
## [1] I II II III I II III III I II III III I
## Levels: I II III
chicago_air$date <- as.Date(chicago_air$date)
chicago_air$month <- months(chicago_air$date) # converts our dates into their associated month just like the Months function in Excel.
chicago_air$month <- factor(chicago_air$month,
levels = c('January', 'February', 'March',
'April', 'May', 'June', 'July',
'August', 'September', 'October',
'November', 'December'))
# create a day-of-the-week variable and arrange the factors in the proper order. We can nest our weekdays function within the factor function to save steps.
chicago_air$weekday <- factor(weekdays(chicago_air$date),
levels=c('Sunday', 'Monday', 'Tuesday',
'Wednesday', 'Thursday', 'Friday',
'Saturday'))
Now we’ll create two lattice plots, save them as variables, and display them together using the doubleYScale()
function.
ozone.lat <- xyplot(ozone ~ weekday | month, data = chicago_air, type = "h",
ylab = "Ozone (ppm)", as.table = TRUE,
xlab = "2013", scales = list(x=list(rot=45)))
plot(ozone.lat)
temp.lat <- xyplot(temp ~ weekday | month, data = chicago_air, type = "p",
ylab = "Temp (deg F)", ylim = c(0, 1.2*max(chicago_air$temp,
na.rm = TRUE)), scales = list(x=list(rot=45)))
plot(temp.lat)
doubley <- doubleYScale(ozone.lat, temp.lat, add.ylab2 = TRUE, under = TRUE, text = c( "Ozone (ppm)", "Temp (deg F)"),
column = 2, type = c("l", "p"))
update(doubley, par.settings = simpleTheme(col = c("black", "red"),
pch = c(NA, 16)))
The ggplot2
package is also useful for displaying relationships between variables by conditioning. However, it’s not possible to add two y-axes to the same plot using ggplot2
. - It is worth it to spend some time going through the extensive online help documentation for this package found here. - We will dive into this package more in the intermediate training.
Before we can plot our chicago_air
data in ggplot2
we must reshape this “wide” data frame into a “long” format and plot it. You will learn much more about reshaping data in the intermediate training.
install.pacakges("reshape2")
library(reshape2)
chicago_air$date <- as.Date(chicago_air$date)
long.data <- melt(chicago_air, id.vars = c("date", "month","weekday"))
install.packages("ggplot2")
library(ggplot2)
# assign x variable, y variable, and the conditioning variable
ggplot(long.data, aes(x = date, y = value, color=factor(variable))) +
geom_point() +
facet_grid(variable ~ .,scales="free_y") + # break the plot up by the number of factors in the 'variable' column
ylab("") + xlab("Date") + # add labels
theme(strip.background = element_rect(fill="lemonchiffon1")) # set the color of the title panel for each facet
##Use geom_smooth to create a fitted line and the 95% confidence interval around the line
ggplot(chicago_air, aes(temp, ozone) ) +
geom_point() + geom_smooth(method=lm)
Getting label texts properly symbolized in a plot requires some intricate expressions. We have inclueded a table from Dr. David Carslaw’s openair
manual which has a table of commonly used air pollution text formats (pg. 54 of “The openair
manual” [version: January 2015])
Text required | Expression |
---|---|
NOx | ylab = expression(“NO”[X]) |
PM2.5 | ylab = expression(“PM”[2.5]) |
\(\mu\)g m-3 | ylab = expression(“(” * mu * “g m” ^-3 * “)”) |
PM10 (\(\mu\)g m-3 ) | ylab = expression(“PM”[10] * " (" * mu * “g m” ^-3 * “)”) |
Temperature (°C) | xlab = expression(“Temperature (” * degree * “C)”) |
library(ggplot2)
ggplot(data = chicago_air,
aes(x=month, y=ozone)) +
geom_bar(stat="identity") +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) # or "line"
ggplot(chicago_air, aes(month,ozone)) + geom_bar(stat="identity") +
facet_wrap(~ weekday) + theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
head(airdata)
## site data_status action_code datetime parameter
## 1 840170890005 0 10 20141231T0100-0600 44201
## 2 840170311601 0 10 20141231T0100-0600 44201
## 3 840170314002 0 10 20141231T0100-0600 44201
## 4 840170310001 0 10 20141231T0100-0600 44201
## 5 840171110001 0 10 20141231T0100-0600 44201
## 6 840170971007 0 10 20141231T0100-0600 44201
## duration frequency value unit qc poc lat lon GISDatum elev
## 1 60 0 0.022 7 0 1 42.04915 -88.27303 WGS84 229
## 2 60 0 0.021 7 0 1 41.66812 -87.99057 WGS84 226
## 3 60 0 0.018 7 0 1 41.85524 -87.75247 WGS84 184
## 4 60 0 0.021 7 0 1 41.67099 -87.73246 WGS84 188
## 5 60 0 0.023 7 0 1 42.22144 -88.24221 WGS84 235
## 6 60 0 0.026 7 0 1 42.46757 -87.81005 WGS84 178
## method_code mpc mpc_value uncertainty qualifiers
## 1 87 1 0.005 NA <NA>
## 2 87 1 0.005 NA <NA>
## 3 87 1 0.005 NA <NA>
## 4 87 1 0.005 NA <NA>
## 5 87 1 0.005 NA <NA>
## 6 87 1 0.005 NA <NA>
p <- ggplot(airdata, aes(site, value)) +
facet_grid(parameter ~ .,scales="free_y") # facet_grid controls the panels
p + geom_boxplot(aes(fill=factor(site))) + theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
We can use subset()
within the ggplot
function to look at only the ozone parameter.
p <- ggplot(subset(airdata, parameter=="44201"), aes(site, value))
p + geom_boxplot(aes(fill=factor(site))) + theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
There are a couple of ways that you can save plots to your computer.
png("ozoneplot.png", height = 480, width = 480) # this will save the plot to your working directory or whatever file path you prefer. Can use jpeg, bmp, and tiff formats, as well.
plot(chicago_air$date, chicago_air$ozone,
type='l',
pch = 16,
col = "purple",
lwd = 1.5,
xlab="Date",
ylab = 'Ozone (ppm)',
main = '2013 Chicago Ozone Data')
dev.off() # Need to use this function to get the plot to render to the file.
Now let’s try some exercises to test our understanding of plotting in R.