Load in the Dataset

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Show the data

Look at the structure of the data

str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

Calculating Summary Statistics

mean(airquality$Temp)
## [1] 77.88235

Calculate Median, Standard Deviation, and Variance

median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154

Change the Months from 5 - 9 to May through September

airquality$Month[airquality$Month == 5] <- "May"
airquality$Month[airquality$Month == 6] <- "June"
airquality$Month[airquality$Month == 7] <- "July"
airquality$Month[airquality$Month == 8] <- "August"
airquality$Month[airquality$Month == 9] <- "September"

airquality$Month <- factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

Plot 1: Create a histogram categorized by Month with qplot

p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1

Plot 2: Make a histogram using ggplot

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2

Plot 3: Create side-by-side boxplots categorized by Month

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Months") +
  ylab("Frequency") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

Plot 4: Make the same side-by-side boxplots, but in grey-scale

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Temperatures") +
  ylab("Frequency") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

Plot 5: Scatterplot with facets of Temperature and Solar Radiation of the Months

# Missing values in Temp 
sum(is.na(airquality$Temp))
## [1] 0
# Missing values in Solar.R
sum(is.na(airquality$Solar.R))
## [1] 7
# Replace missing values with median of Solar.R
airquality <- airquality %>% mutate(Solar.R = replace(Solar.R, is.na(Solar.R), median((Solar.R), na.rm = TRUE)))
airquality$Solar.R
##   [1] 190 118 149 313 205 205 299  99  19 194 205 256 290 274  65 334 307  78
##  [19] 322  44   8 320  25  92  66 266 205  13 252 223 279 286 287 242 186 220
##  [37] 264 127 273 291 323 259 250 148 332 322 191 284  37 120 137 150  59  91
##  [55] 250 135 127  47  98  31 138 269 248 236 101 175 314 276 267 272 175 139
##  [73] 264 175 291  48 260 274 285 187 220   7 258 295 294 223  81  82 213 275
##  [91] 253 254  83  24  77 205 205 205 255 229 207 222 137 192 273 157  64  71
## [109]  51 115 244 190 259  36 255 212 238 215 153 203 225 237 188 167 197 183
## [127] 189  95  92 252 220 230 259 236 259 238  24 112 237 224  27 238 201 238
## [145]  14 139  49  20 193 145 191 131 223
# Plot 5
p5 <- airquality %>% ggplot() + 
  geom_point(mapping = aes(x = Solar.R, y = Temp)) +
  ggtitle("Temperature and Solar Radiation each Month") +
  xlab("Solar Radiation") +
  ylab("Temperature") +
  facet_wrap(~ Month, nrow = 2)
p5