Load in Dataset
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Structure of Data
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235
Change months from int to char
airquality$Month[airquality$Month == 5] <- "May"
airquality$Month[airquality$Month == 6] <- "June"
airquality$Month[airquality$Month == 7] <- "July"
airquality$Month[airquality$Month == 8] <- "August"
airquality$Month[airquality$Month == 9] <- "September"
Month change to char instead of numbers
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : chr "May" "May" "May" "May" ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
Summary statistics of dataset
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Length:153 Min. : 1.0
## Class :character 1st Qu.: 8.0
## Mode :character Median :16.0
## Mean :15.8
## 3rd Qu.:23.0
## Max. :31.0
##
Reorder Months so they do not default to alphabetical order
airquality$Month<-factor(airquality$Month, levels= c("May","June","July","August","September"))
Plot:1 Create histogram categorized by Month with qplot
p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins=20)
p1

Plot 2:Make histogram using ggplot
p2 <- airquality %>%
ggplot(aes(x=Temp,fill=Month)) +
geom_histogram(position = "identity", alpha =0.5, binwidth = 5, color = "white") +
scale_fill_discrete(name="Month", labels = c("May","June","July","August","September"))
p2

Plot 3:Create side-by-side boxplots categorized by Month
p3 <- airquality %>%
ggplot(aes(Month,Temp,fill = Month)) +
ggtitle("Temperatures")+
xlab("Months")+
ylab("Frequency")+
geom_boxplot()+
scale_fill_discrete(name = "Month",labels = c("May","June","July","August","September") )
p3

Plot 4:Create side-by-side boxplots categorized by Month in grey scale
p4 <- airquality %>%
ggplot(aes(Month,Temp,fill = Month)) +
ggtitle("Temperatures")+
xlab("Months")+
ylab("Frequency")+
geom_boxplot()+
scale_fill_grey(name = "Month",labels = c("May","June","July","August","September") )
p4

Plot 5 : Scatter plot using ggplot
ggplot(data = airquality) +
geom_point(mapping = aes(x=Temp,y=Ozone,color = Month))
## Warning: Removed 37 rows containing missing values (geom_point).
