library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'stringr' was built under R version 3.6.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(viridis)
## Warning: package 'viridis' was built under R version 3.6.3
## Loading required package: viridisLite
airquality
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

Plot 1

p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20) + scale_fill_viridis_d()
p1

Changed the color pallete to be color-blind friendly. Sometimes the viridis pallete is unkind to color-seeing eyes, it works well enough here.

Plot 2

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_viridis_d(name = "Month", labels = c("May", "June","July", "August", "September")) 
p2

Plot 3

stat_box_data <- function(y, upper_limit = 200) {
  return( 
    data.frame(
      y = 0.95 * upper_limit,
      label = paste('count =', length(y))
    )
  )
}

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Months") +
  ylab("Frequency") +
  geom_boxplot() +
  scale_fill_viridis_d(name = "Month", labels = c("May", "June","July", "August", "September")) +
  stat_summary(fun.y=mean,geom="point",color="black",fill="white",shape=21,show.legend = F) 
p3 

Added means to the boxplots. I used filled dots instead of the typical x because I couldn’t find one color that provided high enough contrast to be visible on all the colors in the plot.

Note that the stat_box_data function is something I lifted off stackexchange for an earlier project; I’m not 100% sure how it works, but it does work!

Plot 4

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Months") +
  ylab("Frequency") +
  geom_boxplot() +
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September")) +
  stat_summary(fun.y=mean,geom="point",color="white",shape=4,show.legend = F)
p4

Fixed x axis label to “Months”. Changed the color of mean points to be visible.

I don’t like how the median line for May is not visible, but I don’t have any ideas on how to fix it except don’t use greyscale.

Plot 5

p5 <- airquality %>%
  ggplot(aes(x=Ozone,y=Temp,color=Month)) +
  labs(x="Ozone",y="Temperature",title="Ozone vs Temperature by Month") +
  geom_point() +
  geom_smooth(se=F,method=lm)
p5
## Warning: Removed 37 rows containing non-finite values (stat_smooth).
## Warning: Removed 37 rows containing missing values (geom_point).

There’s clearly some sort of correlation between Temperature and Ozone, but I don’t have any idea why that might be. I’m not sure that this actually has a linear relationship, but I added the lines in anyways.