library(tidyverse)Airquality Homework
Airquality Tutorial and Homework Assignment
data("airquality")head(airquality) Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6
mean(airquality$Temp)[1] 77.88235
mean(airquality[,4])[1] 77.88235
median(airquality$Temp)[1] 79
sd(airquality$Wind)[1] 3.523001
var(airquality$Wind)[1] 12.41154
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"summary(airquality$Month) Length Class Mode
153 character character
airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))Plot 1: Create a histogram categorized by Month
p1 <- airquality |>
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity")+
scale_fill_discrete(name = "Month",
labels = c("May", "June","July", "August", "September")) +
labs(x = "Monthly Temperatures from May - Sept",
y = "Frequency of Temps",
title = "Histogram of Monthly Temperatures from May - Sept, 1973",
caption = "New York State Department of Conservation and the National Weather Service") #provide the data source
p1`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Histogram of Average Temperature by Month
p2 <- airquality |>
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September")) +
labs(x = "Monthly Temperatures from May - Sept",
y = "Frequency of Temps",
title = "Histogram of Monthly Temperatures from May - Sept, 1973",
caption = "New York State Department of Conservation and the National Weather Service")
p2Plot 3: Create side-by-side boxplots categorized by Month
p3 <- airquality |>
ggplot(aes(Month, Temp, fill = Month)) +
labs(x = "Months from May through September", y = "Temperatures",
title = "Side-by-Side Boxplot of Monthly Temperatures",
caption = "New York State Department of Conservation and the National Weather Service") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 Plot 4: Make the same side-by-side boxplots, but in grey-scale
Side by Side Boxplots in Gray Scale
p4 <- airquality |>
ggplot(aes(Month, Temp, fill = Month)) +
labs(x = "Monthly Temperatures", y = "Temperatures",
title = "Side-by-Side Boxplot of Monthly Temperatures",
caption = "New York State Department of Conservation and the National Weather Service") +
geom_boxplot()+
scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4Plot 5: Now make one new plot on your own, that is meaningfully different from the 4 I have shown you. You can select any of the variables in this dataset. Be sure to explore the dataset to see which variables are included that we have not explored yet. You may create a scatterplot, histogram, boxplot, or something else.
p5 <- airquality |>
ggplot(aes(x=Wind, y=Temp)) +
labs(x = "Wind (mph)",
y = "Temperature (degrees F)",
title = "Scatter Plot of Wind vs Temperature",
caption = "New York State Department of Conservation and the National Weather Service") +
geom_point()
p5Exploring the Relationship Between Wind and Temperature
In the scatter plot titled “Scatter Plot of Wind vs Temperature”, I investigate the potential connection between wind speed and temperature in the “airquality” dataset. The x-axis represents wind speed in miles per hour, and the y-axis represents temperature in degrees Fahrenheit. The purpose of this visualization is to discover patterns or trends regarding how changes in wind speed may relate to variations in temperature. Examining the scatter plot we can see that there is a low negative correlation between wind speed and temperature. This suggests that as the wind speeds increase the temperature will decrease.