Descriptive information and summary statistics for the air quality data

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
airquality <- airquality
head (airquality,10)
##    Ozone Solar.R Wind Temp Month Day
## 1     41     190  7.4   67     5   1
## 2     36     118  8.0   72     5   2
## 3     12     149 12.6   74     5   3
## 4     18     313 11.5   62     5   4
## 5     NA      NA 14.3   56     5   5
## 6     28      NA 14.9   66     5   6
## 7     23     299  8.6   65     5   7
## 8     19      99 13.8   59     5   8
## 9      8      19 20.1   61     5   9
## 10    NA     194  8.6   69     5  10
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 

Change the Months from 5 - 9 to May through September

airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"

A histogram Temp by Month using Qplot

airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September")) 
 p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
   p1

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  ggtitle("A histogram Temp by Month using ggplot")+
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Boxplots of Average Temperature by Month") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Side by Side Boxplots (grey-scale) of monthly Temperature Variations") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") + 
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
  p4

pscatter <- airquality %>%
  ggplot(aes(Temp, Ozone)) +
  ggtitle("Temperature Variations by Ozone and regression line (95% CI) ") +
  xlab("Monthly Temperatures") +
  ylab("Ozone") +
  geom_point()+
  geom_smooth(method=lm)
pscatter
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 37 rows containing non-finite values (stat_smooth).
## Warning: Removed 37 rows containing missing values (geom_point).

model <- lm(Ozone ~ Temp, data = airquality)

summary(model)
## 
## Call:
## lm(formula = Ozone ~ Temp, data = airquality)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.729 -17.409  -0.587  11.306 118.271 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -146.9955    18.2872  -8.038 9.37e-13 ***
## Temp           2.4287     0.2331  10.418  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.71 on 114 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.4877, Adjusted R-squared:  0.4832 
## F-statistic: 108.5 on 1 and 114 DF,  p-value: < 2.2e-16

According to Dr. Shen, there is an association between temperature and ozone. Specifically, global climate change and specific geographical locations may bring about more heat waves during the summer, which could bring about a 70 to 100 percent increase in high ozone episodes. The graph below suggests a strong association between temperature and ozone R^2=.47. Also, the data suggest that a 2.4 increase in temp is associated with one Dobson unit increase in ozone Source: https://news.harvard.edu/gazette/story/2016/04/the-complex-relationship-between-heat-and-ozone/