AirQuality

Author

A Hart

Load in the dataset

library(tidyverse) library(ggplot2)

Load the dataset into Global environment

airquality <- airquality

Use the Head function

head(airquality)
  Ozone Solar.R Wind Temp Month Day
1    41     190  7.4   67     5   1
2    36     118  8.0   72     5   2
3    12     149 12.6   74     5   3
4    18     313 11.5   62     5   4
5    NA      NA 14.3   56     5   5
6    28      NA 14.9   66     5   6

Calculate Summary Statistics

mean(airquality$Temp)
[1] 77.88235
mean(airquality[,4])
[1] 77.88235

Calculate Median, Standard Deviation, and Variance

median(airquality$Temp)
[1] 79
sd(airquality$Wind)
[1] 3.523001
var(airquality$Wind)
[1] 12.41154

Change the Months from numbers to names

airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"

Summary statistics of the dataset

summary(airquality$Month)
   Length     Class      Mode 
      153 character character 

Reorder the Months

airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

Plot 1

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
airquality <- airquality

p1 <- qplot(data = airquality,Temp, fill = Month,geom = "histogram", bins = 20)
Warning: `qplot()` was deprecated in ggplot2 3.4.0.
p1

Plot 2

library(tidyverse)
library(ggplot2)
airquality <- airquality

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September")) +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  ggtitle("Histogram of Monthly Temperatures")
p2

Plot 3

library(tidyverse)
library(ggplot2)
airquality <- airquality

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Monthly Temperatures", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

Plot 4

library(tidyverse)
library(ggplot2)
airquality <- airquality

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Monthly Temperatures", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

Plot 5

library(tidyverse)
library(ggplot2)
airquality <- airquality

p5 <- airquality %>%
  ggplot(aes(Month, Wind, fill = Month)) + 
  labs(x = "Month", y = "Wind speed", title = "Boxplot of Monthly Wind Speeds", caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot()
    
p5 

For the fifth Plot I simply made a Boxplot of the wind speeds. The plot shows wind speeds over the five months, not much change is noted month to month. Originally I made a histogram using wind instead of temp, but I ended up deciding that the boxplot was a better vizualization. I also played around with geom_bar, geom_col and geom_line. With the Boxplot one can see small change month to month clearly and effectively.