Structure of the Data

str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

Summary statistics

mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235

Median, Std, and Variance

median(airquality$Temp)
## [1] 79

Standard Deviation

sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154

Change Months 5-9

airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"

Summary statistics

str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : chr  "May" "May" "May" "May" ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##     Month                Day      
##  Length:153         Min.   : 1.0  
##  Class :character   1st Qu.: 8.0  
##  Mode  :character   Median :16.0  
##                     Mean   :15.8  
##                     3rd Qu.:23.0  
##                     Max.   :31.0  
## 

Reorder Month

airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

Plot 1 - Histogram categorized by Month, qplot

p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 30)
p1

Plot 2 - Histogram with ggplot

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "black")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2

Plot 3 - Boxplots categorized by Month

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Month") +
  ylab("Temperature") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

Plot 4 - Box plots in grey scale

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Month") +
  ylab("Temperature") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

Plot 5 - (a) Ozone Levels vs Month

p5 <- airquality %>%
  ggplot(aes(Month, Ozone,fill = Month)) + 
  ggtitle("Ozone") +
  xlab("Month") +
  ylab("Ozone") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p5 
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).

Plot 5 (b) Ozone level vs Wind speed and Month

p5 <- airquality %>%
  ggplot(aes(Wind, Ozone, fill = Month, color = Month)) + 
  ggtitle("Ozone vs Wind and Month") +
  xlab("Wind") +
  ylab("Ozone") +
  geom_point() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p5 
## Warning: Removed 37 rows containing missing values (geom_point).