Load Airquality dataset included with tidyverse
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
airquality<- datasets::airquality
see what the included airquality dataset looks like:
# structure
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
#
# variety
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
remap months from integers to names
airquality$Month<-as.factor(airquality$Month)
levels(airquality$Month)=c("May","Jun","Jul","Aug","Sep")
Plot 1: Create a histogram categorized by Month with qplot
p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1

Plot 2: Make a histogram using ggplot (instead of qplot)
p2 <- airquality %>%
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=1, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = levels(airquality$Month))
p2

Plot 3: Create side-by-side boxplots categorized by Month
y-axis appears to be temperatures, not frequencies
p3 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Months") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = levels(airquality$Month))
p3

Plot 4: Make the same side-by-side boxplots, but in grey-scale
p4 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Temperatures") +
ylab("Frequency") +
geom_boxplot()+
scale_fill_grey(name = "Month", labels = levels(airquality$Month))
p4

Plot 2a: Make a barplot using ggplot using fill color for Temperature
How to do this with continuous Temp instead of factor?
Had to use negative Temp to get temperatures to rise from bottom
airq<-airquality
airq$Temp<-as.factor(-airq$Temp)
p2a <- airq %>%
ggplot(aes(x=Month, fill=(Temp))) +
geom_bar(stat = "count")+
scale_fill_discrete(name = "Temp", labels =levels(airq$Temp))
p2a

Plot 3a: Create temperature heat map categorized by Month
p3a <- airquality %>%
ggplot(aes(Month,Temp)) +
ggtitle("Temperatures") +
xlab("Months") +
ylab("Temperatures") +
geom_tile(aes(fill=Temp)) +
scale_fill_gradient2(low="blue",mid="purple",high = "red",midpoint = median(airquality$Temp),limits=range(airquality$Temp))
p3a

Plot 3b: Another try from Stackoverflow:
works better than the other tries
p3b<-airquality %>%
ggplot(aes(Month,group=-Temp,fill=Temp))+
geom_bar()+
scale_fill_continuous(low="blue",high="red")
p3b
