library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("airquality")
data("airquality")
#Mean
mean(airquality$Temp)
## [1] 77.88235
#Standard Deviation
sd(airquality$Wind)
## [1] 3.523001
#median
median(airquality$Temp)
## [1] 79
#Variance
var(airquality$Wind)
## [1] 12.41154
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
p1 <- airquality |>
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month",
labels = c("May", "June","July", "August", "September")) +
labs(x = "Monthly Temperatures from May - Sept",
y = "Frequency of Temps",
title = "Histogram of Monthly Temperatures from May - Sept, 1973",
caption = "New York State Department of Conservation and the National Weather Service") #provide the data source
p1
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
p3 <- airquality |>
ggplot(aes(Month, Temp, fill = Month)) +
labs(x = "Months from May through September", y = "Temperatures",
title = "Side-by-Side Boxplot of Monthly Temperatures",
caption = "New York State Department of Conservation and the National Weather Service") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
p4 <- airquality |>
ggplot(aes(Month, Temp, fill = Month)) +
labs(x = "Monthly Temperatures", y = "Temperatures",
title = "Side-by-Side Boxplot of Monthly Temperatures",
caption = "New York State Department of Conservation and the National Weather Service") +
geom_boxplot()+
scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4
# Load necessary packages
library(ggplot2)
library(dplyr)
# Load the airquality dataset
data("airquality")
# Create a new date column
airquality$date <- as.Date(with(airquality, paste(2023, Month, Day, sep = "-")), "%Y-%m-%d")
# Verify the creation of the date column
str(airquality)
## 'data.frame': 153 obs. of 7 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
## $ date : Date, format: "2023-05-01" "2023-05-02" ...
# Filter data for May
may_data <- airquality %>%
filter(Month == 5)
# Display the first few rows of the filtered data
print(head(may_data))
## Ozone Solar.R Wind Temp Month Day date
## 1 41 190 7.4 67 5 1 2023-05-01
## 2 36 118 8.0 72 5 2 2023-05-02
## 3 12 149 12.6 74 5 3 2023-05-03
## 4 18 313 11.5 62 5 4 2023-05-04
## 5 NA NA 14.3 56 5 5 2023-05-05
## 6 28 NA 14.9 66 5 6 2023-05-06
# Create the smooth line chart for May
ggplot(may_data, aes(x = date, y = Ozone)) +
geom_line(color = "blue") +
geom_point() +
geom_smooth(method = "loess", se = FALSE, color = "red") +
labs(title = "Ozone Levels in May", x = "Date", y = "Ozone Level (ppb)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).