Because airquality is a pre-built dataset, we can write it to our data directory to store it for later use.
# install.packages("tidyverse")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235
median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : chr "May" "May" "May" "May" ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Length:153 Min. : 1.0
## Class :character 1st Qu.: 8.0
## Mode :character Median :16.0
## Mean :15.8
## 3rd Qu.:23.0
## Max. :31.0
##
Reorder the Months so they do not default to alphabetical
airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))
p1 <- qplot(data = airquality, Temp, fill = Month, geom = "histogram", bins = 20)
p1
ggplot is more sophisticated than qplot, but still uses ggplot2 package
Reorder the legend so that it is not the default (alphabetical), but rather in order that months come
Outline the bars in white using the color = “white” command
p2 <- airquality %>%
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p2
fill=Month command fills each boxplot with a different color in aesthetics
scale_fill_discrete makes the legend on the side for discrete color values
p3 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Months") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p3
Use the scale_fill_grey command for the grey-scale legend, and again, use fill=Month in the aesthetics
p4 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Temperatures") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_grey(name = "Month", labels = c("May", "June", "July", "August", "September"))
p4
# tried Wind qplot
p5 <- qplot(data = airquality, Wind, fill = Month, geom = "histogram", bins = 20)
p5
# tried Ozone qplot
p6 <- qplot(data = airquality, Ozone, fill = Month, geom = "histogram", bins = 20)
p6
## Warning: Removed 37 rows containing non-finite values (stat_bin).
# tried wind qplot with fill = Day, did not work so well
p7 <- qplot(data = airquality, Wind, fill = Day, geom = "histogram", bins = 20)
p7
# tried wind qplot with fill = Day, changed bins to 100 and still no color but a bit clearer
p8 <- qplot(data = airquality, Wind, fill = Day, geom = "histogram", bins = 100)
p8
#tried qplot with ozone and different bin number
p9 <- qplot(data = airquality, Ozone, fill = Month, geom = "histogram", bins = 50)
p9
## Warning: Removed 37 rows containing non-finite values (stat_bin).
#ggplot with Ozone
p10 <- airquality %>%
ggplot(aes(x=Ozone, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p10
## Warning: Removed 37 rows containing non-finite values (stat_bin).
# ggplot with Ozone larger binwidth than p10
p11 <- airquality %>%
ggplot(aes(x=Ozone, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 10, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p11
## Warning: Removed 37 rows containing non-finite values (stat_bin).
# ggplot wind, comparing binwidth with p 13
p12 <- airquality %>%
ggplot(aes(x=Wind, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 10, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p12
# ggplot wind, compare binwidth with p12
p13 <- airquality %>%
ggplot(aes(x=Wind, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p13
# ggplot wind, compare binwith with p12 and 13
p14 <- airquality %>%
ggplot(aes(x=Wind, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 0.5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p14
# boxplot with ozone
p14 <- airquality %>%
ggplot(aes(Month, Ozone, fill = Month)) +
ggtitle("Ozone") +
xlab("Months") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p14
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).
# boxplot with wind
p15 <- airquality %>%
ggplot(aes(Month, Wind, fill = Month)) +
ggtitle("Wind") +
xlab("Months") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September"))
p15
# Boxplot with Ozone greyscale
p16 <- airquality %>%
ggplot(aes(Month, Ozone, fill = Month)) +
ggtitle("Ozone") +
xlab("Month") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_grey(name = "Month", labels = c("May", "June", "July", "August", "September"))
p16
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).
# boxplot with wind greyscale
p17 <- airquality %>%
ggplot(aes(Month, Wind, fill = Month)) +
ggtitle("Wind") +
xlab("Months") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_grey(name = "Month", labels = c("May", "June", "July", "August", "September"))
p17
# histogram Ozone
p18 <- qplot(data = airquality, Ozone, fill = Month, geom = "histogram", bins = 20)
p18
## Warning: Removed 37 rows containing non-finite values (stat_bin).
# histogram Ozone, attempted a fill of Temp instead of Month... not so successful
p19 <- airquality %>%
ggplot(aes(x=Ozone, fill=Temp)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_grey(name = "Temp", labels = c("65", "72", "56", "67", "55"))
p19
## Warning: Removed 37 rows containing non-finite values (stat_bin).
## Trying to overlay other variables than month, like temp with Ozone, or temp with Wind, etc.
# Scatterplot of Wind and Ozone, not clear how to add color to this...
plot20 <- airquality %>%
ggplot(aes(Wind, Ozone))+
geom_point()+
xlab("Ozone")+
ylab("Wind mph")+
ggtitle("Scatterplot of Wind versus Ozone")
plot20
## Warning: Removed 37 rows containing missing values (geom_point).
# Scatterplot of Wind and temperature, not clear how to add color to this...
plot20 <- airquality %>%
ggplot(aes(Wind, Temp))+
geom_point()+
xlab("Ozone")+
ylab("Temperature")+
ggtitle("Scatterplot of Wind versus Temperature")
plot20