#install.packages("tidyverse")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235
median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : chr "May" "May" "May" "May" ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Length:153 Min. : 1.0
## Class :character 1st Qu.: 8.0
## Mode :character Median :16.0
## Mean :15.8
## 3rd Qu.:23.0
## Max. :31.0
##
airquality$Month<-factor(airquality$Month, levels = c("May","June","July","August","September"))
p1<-qplot(data=airquality,Temp,fill=Month,geom = "histogram",bins = 20)
p1
### Plot 2:Make a histogram using ggplot # ggplot is more sophisticated than qplot, but still uses ggplot2 package. # Reorder the legend so that it is not the default (alphabetical), but rather in order that months come # Outline the bars in white using the color=“white” command
p2<-airquality%>%
ggplot(aes(x=Temp,fill=Month))+
geom_histogram(position="identity",alpha=0.5, binwidth=5,color="white")+
scale_fill_discrete(name="Month",labels=c("May","June","July","August","September"))
p2
### Plot3: Create side-by-side boxplots categorized by Months. # fill=Month command fills each boxplot with a different color in the aesthetics # scale_fill_discrete makes the legend on the side for discrete color values.
p3<-airquality %>%
ggplot(aes(Month,Temp,fill=Month))+
ggtitle("Temperatures")+
xlab("Months")+
ylab("Frequency")+
geom_boxplot()+
scale_fill_discrete(name="Month",labels=c("May","June","July","August","September"))
p3
### Plot4: Make the same side-by-side boxplots, but in grey-scale. # Use the scale_fill_grey command for the grey-scale legend, and again, use fill=Month in the aesthetics.
p4<-airquality%>%
ggplot(aes(Month,Temp,fill=Month))+
ggtitle("Monthly Temperature Variations")+
xlab("Monthly Temperature")+
ylab("Frequency")+
geom_boxplot()+
scale_fill_grey(name="Month",labels=c("May","June","July","August","September"))
p4
p5<-airquality%>%
ggplot(aes(x=Temp,fill=Month))+
geom_histogram(position="identity",alpha=0.5, binwidth=5,color="white")+
scale_fill_grey(name="Month",labels=c("May","June","July","August","September"))
p5