library(tidyverse)Airquality Assignment
Airquality Assignment
Load the library
Load the dataset into your global environment
data("airquality")Look at the structure of the data
View the data using the “head” function
head(airquality) Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6
Calculate Summary Statistics
mean(airquality$Temp)[1] 77.88235
mean(airquality$Wind)[1] 9.957516
var(airquality$Wind)[1] 12.41154
Rename the Months from number to names
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"Now look at the summary statistics of the dataset
summary(airquality$Month) Length Class Mode
153 character character
Month is a categorical variable with different levels, called factors.
airquality$Month<-factor(airquality$Month,
levels=c("May", "June", "July", "August", "September"))Plot 1: Create a histogram categorized by Month
Plot 1 Code
p1 <- airquality |>
ggplot(aes(x = Temp, fill = Month)) +
geom_histogram(position="identity")+
scale_fill_discrete(name = "Month",
labels = c("May", "June","July", "August", "September")) +
labs(x = "Monthly Temperatures from May - Sept",
y = "Frequency of Temps",
title = "Histogram of Monthly Temperatures from May - Sept, 1973",
caption = "New York State Department of Conservation and the National Weather Service") #provide the data sourcePlot 1 Output
p1`stat_bin()` using `bins = 30`. Pick better value `binwidth`.
Plot 2: Improve the histogram of Average Temperature by Month
Plot 2 Code
p2 <- airquality |>
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=0.4, binwidth = 3, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September")) +
labs(x = "Monthly Temperatures from May - Sept",
y = "Frequency of Temps",
title = "Histogram of Monthly Temperatures from May - Sept, 1973",
caption = "New York State Department of Conservation and the National Weather Service")Plot 2 Output
p2Plot 3: Create side-by-side boxplots categorized by Month
p3 <- airquality |>
ggplot(aes(Month, Temp, fill = Month)) +
labs(x = "Months from May through September", y = "Temperatures",
title = "Side-by-Side Boxplot of Monthly Temperatures",
caption = "New York State Department of Conservation and the National Weather Service") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))Plot 3 Output
p3Plot 4: Side by Side Boxplots in Gray Scale
Plot 4 Code
p4 <- airquality |>
ggplot(aes(Month, Temp, fill = Month)) +
labs(x = "Monthly Temperatures", y = "Temperatures",
title = "Side-by-Side Boxplot of Monthly Temperatures",
caption = "New York State Department of Conservation and the National Weather Service") +
geom_boxplot()+
scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))Plot 4 Output
p4Plot 5:
My own plot
p5 <- airquality |>
ggplot(aes(x = Wind, fill = Month)) +
geom_histogram(alpha = 0.4, binwidth = 2) +
labs(
title = "Histogram of Wind Speed by Month",
x = "Wind Speed",
y = "Frecuencia"
)Plot 5 Output
p5