Airquality Assignment

Author

Kepm

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("airquality")
head(airquality)
  Ozone Solar.R Wind Temp Month Day
1    41     190  7.4   67     5   1
2    36     118  8.0   72     5   2
3    12     149 12.6   74     5   3
4    18     313 11.5   62     5   4
5    NA      NA 14.3   56     5   5
6    28      NA 14.9   66     5   6
mean(airquality$Temp)
[1] 77.88235
mean(airquality$Ozone)
[1] NA
mean(airquality$Wind)
[1] 9.957516
summary(airquality)
     Ozone           Solar.R           Wind             Temp      
 Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
 1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
 Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
 Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
 3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
 Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
 NA's   :37       NA's   :7                                       
     Month            Day      
 Min.   :5.000   Min.   : 1.0  
 1st Qu.:6.000   1st Qu.: 8.0  
 Median :7.000   Median :16.0  
 Mean   :6.993   Mean   :15.8  
 3rd Qu.:8.000   3rd Qu.:23.0  
 Max.   :9.000   Max.   :31.0  
                               
str(airquality)
'data.frame':   153 obs. of  6 variables:
 $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
 $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
 $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
 $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
 $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
 $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
median(airquality$Wind)
[1] 9.7
median(airquality$Temp)
[1] 79
sd(airquality$Wind)
[1] 3.523001
var(airquality$Wind)
[1] 12.41154
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
summary(airquality$Month)
   Length     Class      Mode 
      153 character character 
airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

Plot 1 -Histogram categorized by Month

p1 <- airquality |>
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity")+
  scale_fill_discrete(name = "Month", 
                      labels = c("May", "June","July", "August", "September")) +
  labs(x = "Monthly Temperatures from May - Sept", 
       y = "Frequency of Temps",
       title = "Histogram of Monthly Temperatures from May - Sept, 1973",
       caption = "New York State Department of Conservation and the National Weather Service")  #provide the data source
p1
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plot 2-Improving the histogram using ggplot.

p2 <- airquality |>
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September")) +
  labs(x = "Monthly Temperatures from May - Sept", 
       y = "Frequency of Temps",
       title = "Histogram of Monthly Temperatures from May - Sept, 1973",
       caption = "New York State Department of Conservation and the National Weather Service")
p2

Plot 3-Side-by-side boxplots categorized by Month

p3 <- airquality |>
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Months from May through September", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

Plot 4 - Side by Side Boxplots in Gray Scale

p4 <- airquality |>
  ggplot(aes(Month, Temp, fill = Month)) + 
  labs(x = "Monthly Temperatures", y = "Temperatures", 
       title = "Side-by-Side Boxplot of Monthly Temperatures",
       caption = "New York State Department of Conservation and the National Weather Service") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

Plot 5 - Creating a plot by my own with different variable

p5 <- airquality |>
  ggplot(aes(x=Wind, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_linetype_discrete(name = "Month", labels = c("May", "June","July", "August", "September")) +
  labs(x = "Monthly Average Wind Speed from May - Sept", 
       y = "Miles per hour",
       title = "Histogram of Monthly Average Wind Speed from May - Sept, 1973",
       caption = "New York State Department of Conservation and the National Weather Service")
p5

This histogram that I created is based in the data set: Daily air quality measurements in New York, from May to Sept 1973. The histogram displays data regarding Wind variable, in which the average miles per hour (mph) is the unit presented in the plot during the five months from May to September. In the plot is observed that June stands out with the higher average wind speed in miles per hour, being May and September months that also display higher rates of wind that reach 15 and 20 mph. The code I used to modify is x=Wind, labs(x=“Monthly Average Wind Speed from May-Sept” and y= “Miles per hour”) also the title was modified to “Histogram of Monthly Average Wind Speed from May - Sept, 1973”.