Part 2
# download data
ozone_2022 <- read.csv("daily_44201_2022.csv")
ozone_2023 <- read.csv("daily_44201_2023.csv")
SO2_2022 <- read.csv("daily_42401_2022.csv")
SO2_2023 <- read.csv("daily_42401_2023.csv")
CO_2022 <- read.csv("daily_42101_2022.csv")
CO_2023 <- read.csv("daily_42101_2023.csv")
Winds_2022 <- read.csv("daily_WIND_2022.csv")
Winds_2023 <- read.csv("daily_WIND_2023.csv")
Temp_2022 <- read.csv("daily_TEMP_2022.csv")
Temp_2023 <- read.csv("daily_TEMP_2023.csv")
# combine into data sets by year
weather1_2022 <- rbind(CO_2022,ozone_2022,SO2_2022,Winds_2022,Temp_2022)
weather1_2023 <- rbind(CO_2023,ozone_2023,SO2_2023,Winds_2023,Temp_2023)
str(weather1_2022)
## 'data.frame': 1666187 obs. of 29 variables:
## $ State.Code : int 1 1 1 1 1 1 1 1 1 1 ...
## $ County.Code : int 73 73 73 73 73 73 73 73 73 73 ...
## $ Site.Num : int 23 23 23 23 23 23 23 23 23 23 ...
## $ Parameter.Code : int 42101 42101 42101 42101 42101 42101 42101 42101 42101 42101 ...
## $ POC : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Latitude : num 33.6 33.6 33.6 33.6 33.6 ...
## $ Longitude : num -86.8 -86.8 -86.8 -86.8 -86.8 ...
## $ Datum : chr "WGS84" "WGS84" "WGS84" "WGS84" ...
## $ Parameter.Name : chr "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" ...
## $ Sample.Duration : chr "1 HOUR" "1 HOUR" "1 HOUR" "1 HOUR" ...
## $ Pollutant.Standard : chr "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" ...
## $ Date.Local : chr "2022-01-01" "2022-01-02" "2022-01-03" "2022-01-04" ...
## $ Units.of.Measure : chr "Parts per million" "Parts per million" "Parts per million" "Parts per million" ...
## $ Event.Type : chr "None" "None" "None" "None" ...
## $ Observation.Count : int 24 24 10 9 15 24 24 24 24 10 ...
## $ Observation.Percent: num 100 100 42 38 63 100 100 100 100 42 ...
## $ Arithmetic.Mean : num 0.1 0.146 0.2 0.211 0.407 ...
## $ X1st.Max.Value : num 0.1 0.2 0.2 0.3 0.8 0.7 0.3 0.3 0.3 0.2 ...
## $ X1st.Max.Hour : int 0 13 0 17 18 0 17 11 17 0 ...
## $ AQI : int NA NA NA NA NA NA NA NA NA NA ...
## $ Method.Code : int 93 93 93 93 93 93 93 93 93 93 ...
## $ Method.Name : chr "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" ...
## $ Local.Site.Name : chr "North Birmingham" "North Birmingham" "North Birmingham" "North Birmingham" ...
## $ Address : chr "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." ...
## $ State.Name : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ County.Name : chr "Jefferson" "Jefferson" "Jefferson" "Jefferson" ...
## $ City.Name : chr "Birmingham" "Birmingham" "Birmingham" "Birmingham" ...
## $ CBSA.Name : chr "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" ...
## $ Date.of.Last.Change: chr "2023-04-18" "2023-04-18" "2023-04-18" "2023-04-18" ...
str(weather1_2023)
## 'data.frame': 1634762 obs. of 29 variables:
## $ State.Code : int 1 1 1 1 1 1 1 1 1 1 ...
## $ County.Code : int 73 73 73 73 73 73 73 73 73 73 ...
## $ Site.Num : int 23 23 23 23 23 23 23 23 23 23 ...
## $ Parameter.Code : int 42101 42101 42101 42101 42101 42101 42101 42101 42101 42101 ...
## $ POC : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Latitude : num 33.6 33.6 33.6 33.6 33.6 ...
## $ Longitude : num -86.8 -86.8 -86.8 -86.8 -86.8 ...
## $ Datum : chr "WGS84" "WGS84" "WGS84" "WGS84" ...
## $ Parameter.Name : chr "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" ...
## $ Sample.Duration : chr "1 HOUR" "1 HOUR" "1 HOUR" "1 HOUR" ...
## $ Pollutant.Standard : chr "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" ...
## $ Date.Local : chr "2023-01-01" "2023-01-02" "2023-01-03" "2023-01-04" ...
## $ Units.of.Measure : chr "Parts per million" "Parts per million" "Parts per million" "Parts per million" ...
## $ Event.Type : chr "None" "None" "None" "None" ...
## $ Observation.Count : int 24 24 24 24 13 21 24 24 24 24 ...
## $ Observation.Percent: num 100 100 100 100 54 88 100 100 100 100 ...
## $ Arithmetic.Mean : num 0.154 0.121 0.117 0.229 0.277 ...
## $ X1st.Max.Value : num 0.4 0.2 0.2 0.8 0.6 0.5 0.6 0.2 0.4 0.8 ...
## $ X1st.Max.Hour : int 23 0 12 22 0 18 1 17 19 8 ...
## $ AQI : int NA NA NA NA NA NA NA NA NA NA ...
## $ Method.Code : int 93 93 93 93 93 93 93 93 93 93 ...
## $ Method.Name : chr "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" ...
## $ Local.Site.Name : chr "North Birmingham" "North Birmingham" "North Birmingham" "North Birmingham" ...
## $ Address : chr "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." ...
## $ State.Name : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ County.Name : chr "Jefferson" "Jefferson" "Jefferson" "Jefferson" ...
## $ City.Name : chr "Birmingham" "Birmingham" "Birmingham" "Birmingham" ...
## $ CBSA.Name : chr "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" ...
## $ Date.of.Last.Change: chr "2024-04-09" "2024-04-09" "2024-04-09" "2024-04-09" ...
# sort by SF-USD site name
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
weather_2022 <- weather1_2022 %>%
filter(Local.Site.Name == "SF-USD")
weather_2023 <- weather1_2023 %>%
filter(Local.Site.Name == "SF-USD")
weather <- rbind(weather_2022,weather_2023)
# check that we only have SF-USD sites
local.site_2022 <- as.factor(weather_2022$Local.Site.Name)
local.site_2023 <- as.factor(weather_2023$Local.Site.Name)
str(local.site_2022)
## Factor w/ 1 level "SF-USD": 1 1 1 1 1 1 1 1 1 1 ...
str(local.site_2023)
## Factor w/ 1 level "SF-USD": 1 1 1 1 1 1 1 1 1 1 ...
# check variable names
weather_2022$Parameter.Name <- as.factor(weather_2022$Parameter.Name)
weather_2023$Parameter.Name <- as.factor(weather_2023$Parameter.Name)
levels(weather_2022$Parameter.Name)
## [1] "Carbon monoxide" "Outdoor Temperature"
## [3] "Ozone" "Sulfur dioxide"
## [5] "Wind Direction - Resultant" "Wind Speed - Resultant"
levels(weather_2023$Parameter.Name)
## [1] "Carbon monoxide" "Outdoor Temperature"
## [3] "Ozone" "Sulfur dioxide"
## [5] "Wind Direction - Resultant" "Wind Speed - Resultant"
# show head of data
head(weather)
## State.Code County.Code Site.Num Parameter.Code POC Latitude Longitude Datum
## 1 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 2 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 3 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 4 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 5 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 6 46 99 9 42101 3 43.59901 -96.78331 WGS84
## Parameter.Name Sample.Duration Pollutant.Standard Date.Local
## 1 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-06
## 2 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-07
## 3 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-08
## 4 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-09
## 5 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-10
## 6 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-11
## Units.of.Measure Event.Type Observation.Count Observation.Percent
## 1 Parts per million None 8 33
## 2 Parts per million None 24 100
## 3 Parts per million None 24 100
## 4 Parts per million None 24 100
## 5 Parts per million None 24 100
## 6 Parts per million None 24 100
## Arithmetic.Mean X1st.Max.Value X1st.Max.Hour AQI Method.Code
## 1 0.312500 0.4 16 NA 93
## 2 0.225000 0.3 3 NA 93
## 3 0.275000 0.4 10 NA 93
## 4 0.254167 0.4 14 NA 93
## 5 0.275000 0.4 14 NA 93
## 6 0.387500 0.7 19 NA 93
## Method.Name Local.Site.Name
## 1 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 2 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 3 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 4 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 5 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 6 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## Address State.Name County.Name City.Name CBSA.Name
## 1 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 2 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 3 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 4 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 5 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 6 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## Date.of.Last.Change
## 1 2023-09-08
## 2 2023-09-08
## 3 2023-09-08
## 4 2023-09-08
## 5 2023-09-08
## 6 2023-09-08
Exercise 3
library(ggplot2)
library(ggridges)
# Create month column on data for 2022
weather_2022 <- weather_2022 %>%
mutate(Date.Local = as.Date(Date.Local, format = "%Y-%m-%d"),
month = format(Date.Local, "%B"))
# Create month column on data for 2023
weather_2023 <- weather_2023 %>%
mutate(Date.Local = as.Date(Date.Local, format = "%Y-%m-%d"),
month = format(Date.Local, "%B"))
# Set month to be a factor with unique levels
weather_2022$month <- factor(weather_2022$month, levels = unique(weather_2022$month))
weather_2023$month <- factor(weather_2023$month, levels = unique(weather_2023$month))
# Filter out specific variables for 2022
SF_ozone_2022 <- weather_2022 %>%
filter(Parameter.Name == "Ozone")
SF_CO_2022 <- weather_2022 %>%
filter(Parameter.Name == "Carbon monoxide")
SF_SO2_2022 <- weather_2022 %>%
filter(Parameter.Name == "Sulfur dioxide")
SF_Temp_2022 <- weather_2022 %>%
filter(Parameter.Name == "Outdoor Temperature")
SF_windspeed_2022 <- weather_2022 %>%
filter(Parameter.Name == "Wind Speed - Resultant")
SF_winddirec_2022 <- weather_2022 %>%
filter(Parameter.Name == "Wind Direction - Resultant")
# Filter out specific variables for 2023
SF_ozone_2023 <- weather_2023 %>%
filter(Parameter.Name == "Ozone")
SF_CO_2023 <- weather_2023 %>%
filter(Parameter.Name == "Carbon monoxide")
SF_SO2_2023 <- weather_2023 %>%
filter(Parameter.Name == "Sulfur dioxide")
SF_Temp_2023 <- weather_2023 %>%
filter(Parameter.Name == "Outdoor Temperature")
SF_windspeed_2023 <- weather_2023 %>%
filter(Parameter.Name == "Wind Speed - Resultant")
SF_winddirec_2023 <- weather_2023 %>%
filter(Parameter.Name == "Wind Direction - Resultant")
# Plot ridge line plots for each variable in 2022
ggplot(SF_ozone_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Ozone Levels", option = "H") +
labs(title = "Ozone Levels in Sioux Falls, SD 2022",
x = "Ozone Levels", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.00307

ggplot(SF_CO_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Carbon Monoxide Levels", option = "H") +
labs(title = "Carbon Monoxide Levels in Sioux Falls, SD 2022",
x = "Carbon Monoxide Levels", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.0264

ggplot(SF_SO2_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Sulfur Dioxide Levels", option = "H") +
labs(title = "Sulfur Dioxide Levels in Sioux Falls, SD 2022",
x = "Sulfur Dioxide Levels", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.0114

ggplot(SF_Temp_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Temperature", option = "H") +
labs(title = "Temperature in Sioux Falls, SD 2022",
x = "Temperature", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 5.44

ggplot(SF_windspeed_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Wind Speeds", option = "H") +
labs(title = "Wind Speeds in Sioux Falls, SD 2022",
x = "Wind Speeds", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 1.56

ggplot(SF_winddirec_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Wind Direction", option = "H") +
labs(title = "Wind Direction in Sioux Falls, SD 2022",
x = "Wind Direction", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 29.8

# Plot ridge line plots for each variable in 2023
ggplot(SF_ozone_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Ozone Levels", option = "H") +
labs(title = "Ozone Levels in Sioux Falls, SD 2023",
x = "Ozone Levels", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.00312

ggplot(SF_CO_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Carbon Monoxide Levels", option = "H") +
labs(title = "Carbon Monoxide Levels in Sioux Falls, SD 2023",
x = "Carbon Monoxide Levels", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.0197

ggplot(SF_SO2_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Sulfur Dioxide Levels", option = "H") +
labs(title = "Sulfur Dioxide Levels in Sioux Falls, SD 2023",
x = "Sulfur Dioxide Levels", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.057

ggplot(SF_Temp_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Temperature", option = "H") +
labs(title = "Temperature in Sioux Falls, SD 2023",
x = "Temperature", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 4.72

ggplot(SF_windspeed_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Wind Speeds", option = "H") +
labs(title = "Wind Speeds in Sioux Falls, SD 2023",
x = "Wind Speeds", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 1.33

ggplot(SF_winddirec_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Wind Direction", option = "H") +
labs(title = "Wind Direction in Sioux Falls, SD 2023",
x = "Wind Direction", y = "Month") +
theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 35.4

Exercise 4
library(ggplot2)
# create box plot for each criteria gas in 2022
box_CO_2022 <- ggplot(SF_CO_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Carbon Monoxide in Sioux Falls, SD 2022", x = "Month",
y = "Carbon Monoxide Levels") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_CO_2022

box_SO2_2022 <- ggplot(SF_SO2_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Sulfur Dioxide in Sioux Falls, SD 2022", x = "Month",
y = "Sulfur Dioxide Levels") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_SO2_2022

box_Ozone_2022 <- ggplot(SF_ozone_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Ozone in Sioux Falls, SD 2022", x = "Month",
y = "Ozone Levels") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_Ozone_2022

# create box plot for each criteria gas in 2023
box_CO_2023 <- ggplot(SF_CO_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Carbon Monoxide in Sioux Falls, SD 2023", x = "Month",
y = "Carbon Monoxide Levels") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_CO_2023

box_SO2_2023 <- ggplot(SF_SO2_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Sulfur Dioxide in Sioux Falls, SD 2023", x = "Month",
y = "Sulfur Dioxide Levels") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_SO2_2023

box_Ozone_2023 <- ggplot(SF_ozone_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Ozone in Sioux Falls, SD 2023", x = "Month",
y = "Ozone Levels") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_Ozone_2023

Exercise 5
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# create interactive box plot using plotly for meteorological data in 2022
box_Temp_2022 <- ggplot(SF_Temp_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Temperature in Sioux Falls, SD 2022", x = "Month",
y = "Temperature") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_Temp_2022)
box_WindS_2022 <- ggplot(SF_windspeed_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Wind Speed in Sioux Falls, SD 2022", x = "Month",
y = "Wind Speed") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindS_2022)
box_WindD_2022 <- ggplot(SF_winddirec_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Wind Direction in Sioux Falls, SD 2022", x = "Month",
y = "Wind Direction") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindD_2022)
# create interactive box plot using plotly for meteorological data in 2023
box_Temp_2023 <- ggplot(SF_Temp_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Temperature in Sioux Falls, SD 2023", x = "Month",
y = "Temperature") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_Temp_2023)
box_WindS_2023 <- ggplot(SF_windspeed_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Wind Speed in Sioux Falls, SD 2023", x = "Month",
y = "Wind Speed") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindS_2023)
box_WindD_2023 <- ggplot(SF_winddirec_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
geom_boxplot() +
labs(title = "Wind Direction in Sioux Falls, SD 2023", x = "Month",
y = "Wind Direction") +
theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindD_2023)
Exercise 6
# filter and count number of days with mean temp below frost level
frost.days_2022 <- filter(SF_Temp_2022, Arithmetic.Mean < 32)
frost.days_2023 <- filter(SF_Temp_2023, Arithmetic.Mean < 32)
frost.days <- rbind(frost.days_2022,frost.days_2023)
nrow(frost.days)
## [1] 154
# filter and count number of days with mean temp above high temp
high.temp_2022 <- filter(SF_Temp_2022, Arithmetic.Mean > 90)
high.temp_2023 <- filter(SF_Temp_2023, Arithmetic.Mean > 90)
high.temp <- rbind(high.temp_2022,high.temp_2023)
nrow(high.temp)
## [1] 0
Exercise 7
# add month column to weather data
weather <- weather %>%
mutate(Date.Local = as.Date(Date.Local, format = "%Y-%m-%d"),
month = format(Date.Local, "%B"))
weather$month <- factor(weather$month, levels = unique(weather_2023$month))
# create winter subset
Winter <- subset(weather, month == "January" | month == "February" | month == "March")
# display first 10 rows of data
head(Winter)
## State.Code County.Code Site.Num Parameter.Code POC Latitude Longitude Datum
## 1 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 2 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 3 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 4 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 5 46 99 9 42101 3 43.59901 -96.78331 WGS84
## 6 46 99 9 42101 3 43.59901 -96.78331 WGS84
## Parameter.Name Sample.Duration Pollutant.Standard Date.Local
## 1 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-06
## 2 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-07
## 3 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-08
## 4 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-09
## 5 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-10
## 6 Carbon monoxide 1 HOUR CO 1-hour 1971 2022-01-11
## Units.of.Measure Event.Type Observation.Count Observation.Percent
## 1 Parts per million None 8 33
## 2 Parts per million None 24 100
## 3 Parts per million None 24 100
## 4 Parts per million None 24 100
## 5 Parts per million None 24 100
## 6 Parts per million None 24 100
## Arithmetic.Mean X1st.Max.Value X1st.Max.Hour AQI Method.Code
## 1 0.312500 0.4 16 NA 93
## 2 0.225000 0.3 3 NA 93
## 3 0.275000 0.4 10 NA 93
## 4 0.254167 0.4 14 NA 93
## 5 0.275000 0.4 14 NA 93
## 6 0.387500 0.7 19 NA 93
## Method.Name Local.Site.Name
## 1 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 2 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 3 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 4 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 5 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## 6 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER SF-USD
## Address State.Name County.Name City.Name CBSA.Name
## 1 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 2 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 3 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 4 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 5 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## 6 4801 N Career Ave South Dakota Minnehaha Sioux Falls Sioux Falls, SD
## Date.of.Last.Change month
## 1 2023-09-08 January
## 2 2023-09-08 January
## 3 2023-09-08 January
## 4 2023-09-08 January
## 5 2023-09-08 January
## 6 2023-09-08 January
# display dimensions on the data
dim(Winter)
## [1] 1172 30
Exericse 8
# create winter windspeed subsets for each year
winter.windspeed_2022 <- subset(SF_windspeed_2022, month == "January" | month == "February" | month == "March")
winter.windspeed_2023 <- subset(SF_windspeed_2023, month == "January" | month == "February" | month == "March")
# create winter temperature subsets for each year
winter.temp_2022 <- subset(SF_Temp_2022, month == "January" | month == "February" | month == "March")
winter.temp_2023 <- subset(SF_Temp_2023, month == "January" | month == "February" | month == "March")
# calculate average daily wind speed for each year
winter.windspeed.avg_2022 <- sum(winter.windspeed_2022$Arithmetic.Mean)/nrow(winter.windspeed_2022)
winter.windspeed.avg_2023 <- sum(winter.windspeed_2023$Arithmetic.Mean)/nrow(winter.windspeed_2023)
# calculate average daily temperature for each year
winter.temp.avg_2022 <- sum(winter.temp_2022$Arithmetic.Mean)/nrow(winter.temp_2022)
winter.temp.avg_2023 <- sum(winter.temp_2023$Arithmetic.Mean)/nrow(winter.temp_2023)
Exericse 9
# create temperature difference vector for each year
temp.diff_2022 <- winter.temp_2022$Arithmetic.Mean - winter.temp.avg_2022
temp.diff_2023 <- winter.temp_2023$Arithmetic.Mean - winter.temp.avg_2023
# sort data by increasing order
temp.diff_2022 <- sort(temp.diff_2022, decreasing = FALSE)
temp.diff_2023 <- sort(temp.diff_2023, decreasing = FALSE)
# display first 10 observations for each year
temp.diff_2022[1:10]
## [1] -33.00000 -30.62500 -25.95833 -24.16667 -23.66667 -23.58333 -22.95833
## [8] -22.83333 -22.62500 -22.29167
temp.diff_2023[1:10]
## [1] -32.08971 -29.42305 -29.00638 -26.67305 -23.50638 -21.83971 -21.71471
## [8] -21.42305 -21.21471 -21.08971
Part 3
# reorganize data so the parameter names are columns with their means for each year
CO.mean_2022 <- SF_CO_2022$Arithmetic.Mean
SO2.mean_2022 <- SF_SO2_2022$Arithmetic.Mean
Ozone.mean_2022 <- SF_ozone_2022$Arithmetic.Mean
Temp.mean_2022 <- SF_Temp_2022$Arithmetic.Mean
Windsp.mean_2022 <- SF_windspeed_2022$Arithmetic.Mean
Winddir.mean_2022 <- SF_winddirec_2022$Arithmetic.Mean
max_length_2022 <- max(length(CO.mean_2022), length(SO2.mean_2022),length(Ozone.mean_2022),
length(Temp.mean_2022), length(Windsp.mean_2022),length(Winddir.mean_2022))
CO.mean_2022 <- c(CO.mean_2022, rep(NA, max_length_2022 - length(CO.mean_2022)))
SO2.mean_2022 <- c(SO2.mean_2022, rep(NA, max_length_2022 - length(SO2.mean_2022)))
Ozone.mean_2022 <- c(Ozone.mean_2022, rep(NA, max_length_2022 - length(Ozone.mean_2022)))
Temp.mean_2022 <- c(Temp.mean_2022, rep(NA, max_length_2022 - length(Temp.mean_2022)))
Windsp.mean_2022 <- c(Windsp.mean_2022, rep(NA, max_length_2022 - length(Windsp.mean_2022)))
Winddir.mean_2022 <- c(Winddir.mean_2022, rep(NA, max_length_2022 - length(Winddir.mean_2022)))
means_2022 <- cbind(CO.mean_2022,SO2.mean_2022,Ozone.mean_2022,
Temp.mean_2022,Windsp.mean_2022,Winddir.mean_2022)
colnames(means_2022) <- c("Carbon.Monoxide.2022", "Sulfur.Dioxide.2022", "Ozone.2022", "Temperature.2022",
"Wind.Speed.2022", "Wind.Direction.2022")
CO.mean_2023 <- SF_CO_2023$Arithmetic.Mean
SO2.mean_2023 <- SF_SO2_2023$Arithmetic.Mean
Ozone.mean_2023 <- SF_ozone_2023$Arithmetic.Mean
Temp.mean_2023 <- SF_Temp_2023$Arithmetic.Mean
Windsp.mean_2023 <- SF_windspeed_2023$Arithmetic.Mean
Winddir.mean_2023 <- SF_winddirec_2023$Arithmetic.Mean
max_length_2023 <- max(length(CO.mean_2023), length(SO2.mean_2023),length(Ozone.mean_2023),
length(Temp.mean_2023), length(Windsp.mean_2023),length(Winddir.mean_2023))
CO.mean_2023 <- c(CO.mean_2023, rep(NA, max_length_2023 - length(CO.mean_2023)))
SO2.mean_2023 <- c(SO2.mean_2023, rep(NA, max_length_2023 - length(SO2.mean_2023)))
Ozone.mean_2023 <- c(Ozone.mean_2023, rep(NA, max_length_2023 - length(Ozone.mean_2023)))
Temp.mean_2023 <- c(Temp.mean_2023, rep(NA, max_length_2023 - length(Temp.mean_2023)))
Windsp.mean_2023 <- c(Windsp.mean_2023, rep(NA, max_length_2023 - length(Windsp.mean_2023)))
Winddir.mean_2023 <- c(Winddir.mean_2023, rep(NA, max_length_2023 - length(Winddir.mean_2023)))
means_2023 <- cbind(CO.mean_2023,SO2.mean_2023,Ozone.mean_2023,
Temp.mean_2023,Windsp.mean_2023,Winddir.mean_2023)
colnames(means_2023) <- c("Carbon.Monoxide.2023", "Sulfur.Dioxide.2023", "Ozone.2023", "Temperature.2023",
"Wind.Speed.2023", "Wind.Direction.2023")
# combine data sets into one
weather.means <- cbind(means_2022,means_2023)
weather.means <- as.data.frame(weather.means)
Exercise 10
library(shiny)
# Define UI for application that draws a histogram
ui <- fluidPage(
# Application title
titlePanel("Weather Data in 2022 and 2023"),
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(selectInput("cid","Column", choices = colnames(weather.means)),
sliderInput("bins",
"Number of bins:",
min = 1,
max = 50,
value = 12)
),
# Show a plot of the generated distribution
mainPanel(
plotOutput("distPlot")
)
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
output$distPlot <- renderPlot({
# generate bins based on input$bins from ui.R
x <- weather.means[, input$cid]
bins <- seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length.out = input$bins + 1)
#create title
titl <- paste("Histogram of", input$cid, sep = " ")
# draw the histogram with the specified number of bins
hist(x, breaks = bins, col = 'lightblue', border = 'white',
xlab = input$cid,
main = titl)
})
}
# Run the application
shinyApp(ui = ui, server = server)
# URL for Shiny App
# https://129zra-eric-sell.shinyapps.io/STAT515-Final/