Air Quality Assignment

Author

Shalanda Henderson

Use Tidyverse Package

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Load in Air Quality Dataset

airquality <-airquality

View Structure of the Data

head(airquality)
  Ozone Solar.R Wind Temp Month Day
1    41     190  7.4   67     5   1
2    36     118  8.0   72     5   2
3    12     149 12.6   74     5   3
4    18     313 11.5   62     5   4
5    NA      NA 14.3   56     5   5
6    28      NA 14.9   66     5   6

Calc Summary Stats - Mean

mean(airquality$Temp)
[1] 77.88235

Calc Summary Stats - Median

median(airquality$Temp)
[1] 79

Calc Summary Stats - Standard Deviation

sd(airquality$Wind)
[1] 3.523001

Calc Summary Stats - Variance

var(airquality$Wind)
[1] 12.41154

Convert Months from #s to names (MAY - SEPT)

airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6] <- "June"
airquality$Month[airquality$Month == 7] <- "July"
airquality$Month[airquality$Month == 8] <- "August"
airquality$Month[airquality$Month == 9] <- "September"

Reorder Months Chronologically

airquality$Month<-factor(airquality$Month, levels=c("May", "June", "July", "August", "September"))

Plot 1: Histogram categorized by Month

plot1 <-qplot(data = airquality,Temp,fill = Month, geom = "histogram", bins = 20)
Warning: `qplot()` was deprecated in ggplot2 3.4.0.
plot1 #why is graph name needed at the end?

Plot 2: Histogram of Avg. Temp by Month

plot2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white") +
  scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September")) + #I don't understand the purpose of scale command
  xlab("Temperature") +
  ylab("Frequency") +
  ggtitle("Monthly Temperatures")
plot2 + theme(plot.title = element_text(hjust = 0.5)) #center title name

Plot 3: Side-by-Side Boxplots of Avg Temp by Month

plot3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Monthly Temperatures", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") + theme(plot.caption = element_text(hjust = 0.0)) + #adjusted the caption
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
plot3 + theme(legend.key = element_blank()) #remove gray background from the legend

Plot 4: Side-by-Side Bloxplots - Gray Scale

plot4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Monthly Temperatures", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
plot4 + theme(legend.key = element_blank()) #remove gray background from the legend

help(airquality)

Plot 5: Histogram of Monthly Average Wind Speeds

plot5 <- airquality %>%
  ggplot(aes(x=Wind, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")   + theme(panel.background = element_rect(fill= "white")) + #create white     background for the histogram 
  labs(caption="Average wind speed in miles per hour at 0700 and 1000 hours at LaGuardia Airport") +
  scale_fill_discrete(name = "Month", labels = c("May", "June", "July", "August", "September")) +
  xlab("Wind Speed") +
  ylab("Average") +
  ggtitle("Monthly Average Wind Speed")
plot5 + theme(plot.title = element_text(hjust = 0.5)) #center title name

#I created a histogram looking at the average wind speed by month. 
#I modified the code from Plot 2 to create Plot 5. 
#I also found code online that helped with other aesthetics 
#(I made comments within the code), such as changing the 
#background of the histogram to white, adding a caption, 
#and centering the title name.