# install.packages("tidyverse")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Show the data
airquality <- airquality
#Look at the structure of the data
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
If you want to look at specific statistics, here are some variations on coding. Here are two different ways to calculate “mean”.
mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235
median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154
airquality$Month[airquality$Month ==5]<- "May"
airquality$Month[airquality$Month ==6]<- "June"
airquality$Month[airquality$Month ==7]<- "July"
airquality$Month[airquality$Month ==8]<- "August"
airquality$Month[airquality$Month ==9]<- "September"
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : chr "May" "May" "May" "May" ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Length:153 Min. : 1.0
## Class :character 1st Qu.: 8.0
## Mode :character Median :16.0
## Mean :15.8
## 3rd Qu.:23.0
## Max. :31.0
##
Reorder the Months so they do not default to alphabetical
airquality$Month<-factor(airquality$Month, levels=c("May","June","July", "August", "September"))
Qplot stands for “Quick-Plot” (in the ggplot2 package)
p1 <- qplot(data= airquality,Temp,fill = Month,geom = "histogram", bins =20)
p1
## Plot 2: Make a histogram using ggplot
ggplot is more sophisticated than qplot, but still uses ggplot2 package Reorder the legend so that it is not the default (alphabetical), but rather in order that months come Outline the bars in white usin the color =“white” command
p2 <- airquality %>%
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth= 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June", "July" , "August", "September"))
p2
## Plot 3: Create side-by-side boxplots categorized by Month
fill=Month command fills each boxplot with a different color in the aesthetics scale_fill_discrete makes the legend on the side for discrete color values
p3 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Monthly Temperatures") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3
## Plot 4: Make the same side-by-side boxplots, but in grey-scale
Use the scale_fill_grey command for the grey-scale legend, and again, use fill=Month in the aesthetics
p4 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Monthly Temperature Variations") +
xlab("Monthly Temperatures") +
ylab("Frequency") +
geom_boxplot()+
scale_fill_grey(name = "Month", labels = c("May", "June", "July", "August", "September"))
p4
## Plot 5: Now make one plot on your own of any of the variables in this dataset. It may be a scatterplot, histogram, or boxplot.
Be sure to briefly describe the plot you have created and what code you used to make this modification.
p2 <- airquality %>%
ggplot(aes(Temp, fill=Month )) +
geom_histogram(position="identity", alpha=0.5, binwidth= 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("Janurary", "February", "March" , "April", "May"))
p2
The plot I have created is a bar graph. The variable I have plotted is the month and count. kk