Calulating summary statistics
One way of calculating “mean”
mean(airquality$Temp)
## [1] 77.88235
Another way of calculating “mean”
mean(airquality[,4])
## [1] 77.88235
Changing the Months from 5-9 to May through September
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : chr "May" "May" "May" "May" ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
Plot 1: Histogram Categorized by Month with qplot
p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1

Plot 2: Histogram Using ggplot
p2 <- airquality %>%
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2

Plot 3: Side-by-Side Boxplots Categorized by Month
p3 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Months") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3

Plot 4: Same as previous Side-by-Side Boxplots, but in grey-scale
p4 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Temperatures") +
ylab("Frequency") +
geom_boxplot()+
scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

Plot 5: Scatterplot using plot
plot(airquality$Temp, airquality$Wind, main = 'Shows no strong correlation between Temperature and Wind', xlab = 'Temperatures', ylab='Wind')
abline(lm(Wind ~ Temp, data = airquality), col='red')

#plot(airquality)
airquality %>%
ggplot(aes(Temp,Wind, color=Ozone))+
geom_point(size=3)+
labs(x="Temperature",y="Wind",title="Correlation between Temperature and Wind")+
geom_smooth(method="lm", color='red')+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
