Air Quality (HW week 2)

library(tidyverse)
library(knitr)

Load in the dataset

data("airquality")

Head shows us the 1st 6 rows

head(airquality)
  Ozone Solar.R Wind Temp Month Day
1    41     190  7.4   67     5   1
2    36     118  8.0   72     5   2
3    12     149 12.6   74     5   3
4    18     313 11.5   62     5   4
5    NA      NA 14.3   56     5   5
6    28      NA 14.9   66     5   6

Stats summary

mean(airquality$Temp)
[1] 77.88235
mean(airquality[,4])
[1] 77.88235
median(airquality$Temp)
[1] 79
sd(airquality$Wind)
[1] 3.523001
var(airquality$Wind)
[1] 12.41154

Renames months from number to name

airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"

Shows months are now strings instead of ints

summary(airquality$Month)
   Length     Class      Mode 
      153 character character 

Order the months (default is alphabetical)

airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

Histogram by month

p1 <- airquality |>
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity")+
  scale_fill_discrete(name = "Month", 
                      labels = c("May", "June","July", "August", "September")) +
  labs(x = "Monthly Temperatures from May - Sept", 
       y = "Frequency of Temps",
       title = "Histogram of Monthly Temperatures from May - Sept, 1973",
       caption = "New York State Department of Conservation and the National Weather Service")  #provide the data source
p1
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Better formatted histogram (tranparency, outline)

p2 <- airquality |>
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September")) +
  labs(x = "Monthly Temperatures from May - Sept", 
       y = "Frequency of Temps",
       title = "Histogram of Monthly Temperatures from May - Sept, 1973",
       caption = "New York State Department of Conservation and the National Weather Service")
p2

Boxplots for each month

p3 <- airquality |>
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Months from May through September", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

The same boxplots but greyscale

p4 <- airquality |>
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Monthly Temperatures", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

Ozone layer size by temperature

suppressMessages(
p5 <- airquality |>
  ggplot(aes(x=Temp, y=Ozone, color = Month)) + 
    labs(y = "Ozone level", x = "Temperature",
    title = "Scatterplot of Ozone levels by Temmperature",
    caption = "New York State Department of Conservation and the National Weather Service") +
    geom_point(size = 2) +
    ylim(0,150) +
    geom_smooth(method=lm, se = FALSE, linewidth=0.4, alpha=0.1) +
    scale_color_manual(values = c('red', 'orange', 'green','blue', 'purple')) +
    theme(legend.position="right"))
p5
Warning: Removed 38 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 38 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_smooth()`).

The data I chose to display is a scatterplot of the ozone levels by temperature. The colors display what month the data is from and the lines are regression lines by month. This dataset shows that there is a positive correlation between temperature and ozone levels.