Part 1

Exercise 1

# create function
myfun <- function(y,m){
  x <- sample(y,m)
  s <- NULL
  d <- NULL
  # create for loop with given conditions
  for (i in x) {
    if (i <= 100 & i%%4 == 0){
      s <- c(s,i)
    }
    if (i >= 120 & i%%3 ==0){
      d <- c(d,i)
    }
  }
  # calculate desired values
  s_mean <- mean(s)
  s_var <- var(s)
  d_mean <- mean(d)
  d_var <- var(d)
  # create list
  l <- list("Selected Integers" = x,"s"=s,"s mean" = s_mean,
            "s variance" = s_var,"d"= d,"d mean"= d_mean,
            "d variance"= d_var)
  return(l)
}
  
# apply function to our vector
myfun(c(1:300),40)
## $`Selected Integers`
##  [1] 134 106 135 233 178  83  79 137 149  81 200 237 100 296  82  49  36  92 263
## [20]  13 198 250  87 231 218 191 142 109 240  17 167 133 189  24 282 153 226  44
## [39] 120 147
## 
## $s
## [1] 100  36  92  24  44
## 
## $`s mean`
## [1] 59.2
## 
## $`s variance`
## [1] 1187.2
## 
## $d
##  [1] 135 237 198 231 240 189 282 153 120 147
## 
## $`d mean`
## [1] 193.2
## 
## $`d variance`
## [1] 2884.4

Exercise 2

x <- seq(-5,40, length=500)         # x values
dist.x <- dnorm(x, mean = 5, sd = 10)    # Returns CDF of normal distribution with mean 5 and sd 10.

degf <- c(3,10,20)             # Set up the three degree of freedoms
colors <- c("green","red","blue","black")  # Set up the four colors 
labels <- c("df=3", "df=10","df=20",  "normal")       # Label the plots

plot(x,dist.x, type="l", lty=2, lwd=2, xlab="x value",
  ylab="y value", main="Comparison of Normal and Chi-square Distributions")         # Plot normal density curve

for (i in 1:3){
  lines(x, dchisq(x,degf[i]), lty=1, lwd=2, col=colors[i]) # Plot chi-square distribution curves
                                   # dchisq() returns CDF of chi-square distribution
                                   # col= set up colors for the chi-square distributions
}

legend("topright", inset=0.01, title="Distributions",       # Set up the title of legend
  labels, lwd=2, lty=c('solid','solid','solid','dashed'), col=colors)  # Set up line types appearing in the legend

Part 2

# download data
ozone_2022 <- read.csv("daily_44201_2022.csv")
ozone_2023 <- read.csv("daily_44201_2023.csv")
SO2_2022 <- read.csv("daily_42401_2022.csv")
SO2_2023 <- read.csv("daily_42401_2023.csv")
CO_2022 <- read.csv("daily_42101_2022.csv")
CO_2023 <- read.csv("daily_42101_2023.csv")
Winds_2022 <- read.csv("daily_WIND_2022.csv")
Winds_2023 <- read.csv("daily_WIND_2023.csv")
Temp_2022 <- read.csv("daily_TEMP_2022.csv")
Temp_2023 <- read.csv("daily_TEMP_2023.csv")

# combine into data sets by year
weather1_2022 <- rbind(CO_2022,ozone_2022,SO2_2022,Winds_2022,Temp_2022)
weather1_2023 <- rbind(CO_2023,ozone_2023,SO2_2023,Winds_2023,Temp_2023)
str(weather1_2022)
## 'data.frame':    1666187 obs. of  29 variables:
##  $ State.Code         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ County.Code        : int  73 73 73 73 73 73 73 73 73 73 ...
##  $ Site.Num           : int  23 23 23 23 23 23 23 23 23 23 ...
##  $ Parameter.Code     : int  42101 42101 42101 42101 42101 42101 42101 42101 42101 42101 ...
##  $ POC                : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Latitude           : num  33.6 33.6 33.6 33.6 33.6 ...
##  $ Longitude          : num  -86.8 -86.8 -86.8 -86.8 -86.8 ...
##  $ Datum              : chr  "WGS84" "WGS84" "WGS84" "WGS84" ...
##  $ Parameter.Name     : chr  "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" ...
##  $ Sample.Duration    : chr  "1 HOUR" "1 HOUR" "1 HOUR" "1 HOUR" ...
##  $ Pollutant.Standard : chr  "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" ...
##  $ Date.Local         : chr  "2022-01-01" "2022-01-02" "2022-01-03" "2022-01-04" ...
##  $ Units.of.Measure   : chr  "Parts per million" "Parts per million" "Parts per million" "Parts per million" ...
##  $ Event.Type         : chr  "None" "None" "None" "None" ...
##  $ Observation.Count  : int  24 24 10 9 15 24 24 24 24 10 ...
##  $ Observation.Percent: num  100 100 42 38 63 100 100 100 100 42 ...
##  $ Arithmetic.Mean    : num  0.1 0.146 0.2 0.211 0.407 ...
##  $ X1st.Max.Value     : num  0.1 0.2 0.2 0.3 0.8 0.7 0.3 0.3 0.3 0.2 ...
##  $ X1st.Max.Hour      : int  0 13 0 17 18 0 17 11 17 0 ...
##  $ AQI                : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Method.Code        : int  93 93 93 93 93 93 93 93 93 93 ...
##  $ Method.Name        : chr  "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" ...
##  $ Local.Site.Name    : chr  "North Birmingham" "North Birmingham" "North Birmingham" "North Birmingham" ...
##  $ Address            : chr  "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." ...
##  $ State.Name         : chr  "Alabama" "Alabama" "Alabama" "Alabama" ...
##  $ County.Name        : chr  "Jefferson" "Jefferson" "Jefferson" "Jefferson" ...
##  $ City.Name          : chr  "Birmingham" "Birmingham" "Birmingham" "Birmingham" ...
##  $ CBSA.Name          : chr  "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" ...
##  $ Date.of.Last.Change: chr  "2023-04-18" "2023-04-18" "2023-04-18" "2023-04-18" ...
str(weather1_2023)
## 'data.frame':    1634762 obs. of  29 variables:
##  $ State.Code         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ County.Code        : int  73 73 73 73 73 73 73 73 73 73 ...
##  $ Site.Num           : int  23 23 23 23 23 23 23 23 23 23 ...
##  $ Parameter.Code     : int  42101 42101 42101 42101 42101 42101 42101 42101 42101 42101 ...
##  $ POC                : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Latitude           : num  33.6 33.6 33.6 33.6 33.6 ...
##  $ Longitude          : num  -86.8 -86.8 -86.8 -86.8 -86.8 ...
##  $ Datum              : chr  "WGS84" "WGS84" "WGS84" "WGS84" ...
##  $ Parameter.Name     : chr  "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" "Carbon monoxide" ...
##  $ Sample.Duration    : chr  "1 HOUR" "1 HOUR" "1 HOUR" "1 HOUR" ...
##  $ Pollutant.Standard : chr  "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" "CO 1-hour 1971" ...
##  $ Date.Local         : chr  "2023-01-01" "2023-01-02" "2023-01-03" "2023-01-04" ...
##  $ Units.of.Measure   : chr  "Parts per million" "Parts per million" "Parts per million" "Parts per million" ...
##  $ Event.Type         : chr  "None" "None" "None" "None" ...
##  $ Observation.Count  : int  24 24 24 24 13 21 24 24 24 24 ...
##  $ Observation.Percent: num  100 100 100 100 54 88 100 100 100 100 ...
##  $ Arithmetic.Mean    : num  0.154 0.121 0.117 0.229 0.277 ...
##  $ X1st.Max.Value     : num  0.4 0.2 0.2 0.8 0.6 0.5 0.6 0.2 0.4 0.8 ...
##  $ X1st.Max.Hour      : int  23 0 12 22 0 18 1 17 19 8 ...
##  $ AQI                : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Method.Code        : int  93 93 93 93 93 93 93 93 93 93 ...
##  $ Method.Name        : chr  "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" "INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER" ...
##  $ Local.Site.Name    : chr  "North Birmingham" "North Birmingham" "North Birmingham" "North Birmingham" ...
##  $ Address            : chr  "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." "NO. B'HAM,SOU R.R., 3009 28TH ST. NO." ...
##  $ State.Name         : chr  "Alabama" "Alabama" "Alabama" "Alabama" ...
##  $ County.Name        : chr  "Jefferson" "Jefferson" "Jefferson" "Jefferson" ...
##  $ City.Name          : chr  "Birmingham" "Birmingham" "Birmingham" "Birmingham" ...
##  $ CBSA.Name          : chr  "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" "Birmingham-Hoover, AL" ...
##  $ Date.of.Last.Change: chr  "2024-04-09" "2024-04-09" "2024-04-09" "2024-04-09" ...
# sort by SF-USD site name
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
weather_2022 <- weather1_2022 %>%
  filter(Local.Site.Name == "SF-USD")
weather_2023 <- weather1_2023 %>%
  filter(Local.Site.Name == "SF-USD")
weather <- rbind(weather_2022,weather_2023)
# check that we only have SF-USD sites
local.site_2022 <- as.factor(weather_2022$Local.Site.Name)
local.site_2023 <- as.factor(weather_2023$Local.Site.Name)
str(local.site_2022)
##  Factor w/ 1 level "SF-USD": 1 1 1 1 1 1 1 1 1 1 ...
str(local.site_2023)
##  Factor w/ 1 level "SF-USD": 1 1 1 1 1 1 1 1 1 1 ...
# check variable names
weather_2022$Parameter.Name <- as.factor(weather_2022$Parameter.Name)
weather_2023$Parameter.Name <- as.factor(weather_2023$Parameter.Name)
levels(weather_2022$Parameter.Name)
## [1] "Carbon monoxide"            "Outdoor Temperature"       
## [3] "Ozone"                      "Sulfur dioxide"            
## [5] "Wind Direction - Resultant" "Wind Speed - Resultant"
levels(weather_2023$Parameter.Name)
## [1] "Carbon monoxide"            "Outdoor Temperature"       
## [3] "Ozone"                      "Sulfur dioxide"            
## [5] "Wind Direction - Resultant" "Wind Speed - Resultant"
# show head of data
head(weather)
##   State.Code County.Code Site.Num Parameter.Code POC Latitude Longitude Datum
## 1         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 2         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 3         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 4         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 5         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 6         46          99        9          42101   3 43.59901 -96.78331 WGS84
##    Parameter.Name Sample.Duration Pollutant.Standard Date.Local
## 1 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-06
## 2 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-07
## 3 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-08
## 4 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-09
## 5 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-10
## 6 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-11
##    Units.of.Measure Event.Type Observation.Count Observation.Percent
## 1 Parts per million       None                 8                  33
## 2 Parts per million       None                24                 100
## 3 Parts per million       None                24                 100
## 4 Parts per million       None                24                 100
## 5 Parts per million       None                24                 100
## 6 Parts per million       None                24                 100
##   Arithmetic.Mean X1st.Max.Value X1st.Max.Hour AQI Method.Code
## 1        0.312500            0.4            16  NA          93
## 2        0.225000            0.3             3  NA          93
## 3        0.275000            0.4            10  NA          93
## 4        0.254167            0.4            14  NA          93
## 5        0.275000            0.4            14  NA          93
## 6        0.387500            0.7            19  NA          93
##                                         Method.Name Local.Site.Name
## 1 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 2 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 3 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 4 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 5 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 6 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
##             Address   State.Name County.Name   City.Name       CBSA.Name
## 1 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 2 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 3 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 4 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 5 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 6 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
##   Date.of.Last.Change
## 1          2023-09-08
## 2          2023-09-08
## 3          2023-09-08
## 4          2023-09-08
## 5          2023-09-08
## 6          2023-09-08

Exercise 3

library(ggplot2)
library(ggridges)
# Create month column on data for 2022
weather_2022 <- weather_2022 %>%
  mutate(Date.Local = as.Date(Date.Local, format = "%Y-%m-%d"),
         month = format(Date.Local, "%B"))

# Create month column on data for 2023
weather_2023 <- weather_2023 %>%
  mutate(Date.Local = as.Date(Date.Local, format = "%Y-%m-%d"),
         month = format(Date.Local, "%B"))

# Set month to be a factor with unique levels
weather_2022$month <- factor(weather_2022$month, levels = unique(weather_2022$month))
weather_2023$month <- factor(weather_2023$month, levels = unique(weather_2023$month))

# Filter out specific variables for 2022
SF_ozone_2022 <- weather_2022 %>%
  filter(Parameter.Name == "Ozone")
SF_CO_2022 <- weather_2022 %>%
  filter(Parameter.Name == "Carbon monoxide")
SF_SO2_2022 <- weather_2022 %>%
  filter(Parameter.Name == "Sulfur dioxide")
SF_Temp_2022 <- weather_2022 %>%
  filter(Parameter.Name == "Outdoor Temperature")
SF_windspeed_2022 <- weather_2022 %>%
  filter(Parameter.Name == "Wind Speed - Resultant")
SF_winddirec_2022 <- weather_2022 %>%
  filter(Parameter.Name == "Wind Direction - Resultant")

# Filter out specific variables for 2023
SF_ozone_2023 <- weather_2023 %>%
  filter(Parameter.Name == "Ozone")
SF_CO_2023 <- weather_2023 %>%
  filter(Parameter.Name == "Carbon monoxide")
SF_SO2_2023 <- weather_2023 %>%
  filter(Parameter.Name == "Sulfur dioxide")
SF_Temp_2023 <- weather_2023 %>%
  filter(Parameter.Name == "Outdoor Temperature")
SF_windspeed_2023 <- weather_2023 %>%
  filter(Parameter.Name == "Wind Speed - Resultant")
SF_winddirec_2023 <- weather_2023 %>%
  filter(Parameter.Name == "Wind Direction - Resultant")

# Plot ridge line plots for each variable in 2022
ggplot(SF_ozone_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Ozone Levels", option = "H") +
  labs(title = "Ozone Levels in Sioux Falls, SD 2022", 
       x = "Ozone Levels", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.00307

ggplot(SF_CO_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Carbon Monoxide Levels", option = "H") +
  labs(title = "Carbon Monoxide Levels in Sioux Falls, SD 2022", 
       x = "Carbon Monoxide Levels", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.0264

ggplot(SF_SO2_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Sulfur Dioxide Levels", option = "H") +
  labs(title = "Sulfur Dioxide Levels in Sioux Falls, SD 2022", 
       x = "Sulfur Dioxide Levels", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.0114

ggplot(SF_Temp_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Temperature", option = "H") +
  labs(title = "Temperature in Sioux Falls, SD 2022", 
       x = "Temperature", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 5.44

ggplot(SF_windspeed_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Wind Speeds", option = "H") +
  labs(title = "Wind Speeds in Sioux Falls, SD 2022", 
       x = "Wind Speeds", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 1.56

ggplot(SF_winddirec_2022, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Wind Direction", option = "H") +
  labs(title = "Wind Direction in Sioux Falls, SD 2022", 
       x = "Wind Direction", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 29.8

# Plot ridge line plots for each variable in 2023
ggplot(SF_ozone_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Ozone Levels", option = "H") +
  labs(title = "Ozone Levels in Sioux Falls, SD 2023", 
       x = "Ozone Levels", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.00312

ggplot(SF_CO_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Carbon Monoxide Levels", option = "H") +
  labs(title = "Carbon Monoxide Levels in Sioux Falls, SD 2023", 
       x = "Carbon Monoxide Levels", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.0197

ggplot(SF_SO2_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Sulfur Dioxide Levels", option = "H") +
  labs(title = "Sulfur Dioxide Levels in Sioux Falls, SD 2023", 
       x = "Sulfur Dioxide Levels", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 0.057

ggplot(SF_Temp_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Temperature", option = "H") +
  labs(title = "Temperature in Sioux Falls, SD 2023", 
       x = "Temperature", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 4.72

ggplot(SF_windspeed_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Wind Speeds", option = "H") +
  labs(title = "Wind Speeds in Sioux Falls, SD 2023", 
       x = "Wind Speeds", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 1.33

ggplot(SF_winddirec_2023, aes(x=Arithmetic.Mean, y=month, fill=stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Wind Direction", option = "H") +
  labs(title = "Wind Direction in Sioux Falls, SD 2023", 
       x = "Wind Direction", y = "Month") +
  theme(plot.title = element_text(hjust = 0.5))
## Picking joint bandwidth of 35.4

Exercise 4

library(ggplot2)
# create box plot for each criteria gas in 2022
box_CO_2022 <- ggplot(SF_CO_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Carbon Monoxide in Sioux Falls, SD 2022", x = "Month",
       y = "Carbon Monoxide Levels") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_CO_2022

box_SO2_2022 <- ggplot(SF_SO2_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Sulfur Dioxide in Sioux Falls, SD 2022", x = "Month",
       y = "Sulfur Dioxide Levels") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_SO2_2022

box_Ozone_2022 <- ggplot(SF_ozone_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Ozone in Sioux Falls, SD 2022", x = "Month",
       y = "Ozone Levels") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_Ozone_2022

# create box plot for each criteria gas in 2023
box_CO_2023 <- ggplot(SF_CO_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Carbon Monoxide in Sioux Falls, SD 2023", x = "Month",
       y = "Carbon Monoxide Levels") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_CO_2023

box_SO2_2023 <- ggplot(SF_SO2_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Sulfur Dioxide in Sioux Falls, SD 2023", x = "Month",
       y = "Sulfur Dioxide Levels") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_SO2_2023

box_Ozone_2023 <- ggplot(SF_ozone_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Ozone in Sioux Falls, SD 2023", x = "Month",
       y = "Ozone Levels") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
box_Ozone_2023

Exercise 5

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# create interactive box plot using plotly for meteorological data in 2022
box_Temp_2022 <- ggplot(SF_Temp_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Temperature in Sioux Falls, SD 2022", x = "Month",
       y = "Temperature") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_Temp_2022)
box_WindS_2022 <- ggplot(SF_windspeed_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Wind Speed in Sioux Falls, SD 2022", x = "Month",
       y = "Wind Speed") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindS_2022)
box_WindD_2022 <- ggplot(SF_winddirec_2022, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Wind Direction in Sioux Falls, SD 2022", x = "Month",
       y = "Wind Direction") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindD_2022)
# create interactive box plot using plotly for meteorological data in 2023
box_Temp_2023 <- ggplot(SF_Temp_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Temperature in Sioux Falls, SD 2023", x = "Month",
       y = "Temperature") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_Temp_2023)
box_WindS_2023 <- ggplot(SF_windspeed_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Wind Speed in Sioux Falls, SD 2023", x = "Month",
       y = "Wind Speed") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindS_2023)
box_WindD_2023 <- ggplot(SF_winddirec_2023, aes(x=month,y=Arithmetic.Mean, fill = month)) +
  geom_boxplot() +
  labs(title = "Wind Direction in Sioux Falls, SD 2023", x = "Month",
       y = "Wind Direction") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))
ggplotly(box_WindD_2023)

Exercise 6

# filter and count number of days with mean temp below frost level
frost.days_2022 <- filter(SF_Temp_2022, Arithmetic.Mean < 32)
frost.days_2023 <- filter(SF_Temp_2023, Arithmetic.Mean < 32)
frost.days <- rbind(frost.days_2022,frost.days_2023)
nrow(frost.days)
## [1] 154
# filter and count number of days with mean temp above high temp
high.temp_2022 <- filter(SF_Temp_2022, Arithmetic.Mean > 90)
high.temp_2023 <- filter(SF_Temp_2023, Arithmetic.Mean > 90)
high.temp <- rbind(high.temp_2022,high.temp_2023)
nrow(high.temp)
## [1] 0

Exercise 7

# add month column to weather data
weather <- weather %>%
  mutate(Date.Local = as.Date(Date.Local, format = "%Y-%m-%d"),
         month = format(Date.Local, "%B"))
weather$month <- factor(weather$month, levels = unique(weather_2023$month))
# create winter subset
Winter <- subset(weather, month == "January" | month == "February" | month == "March")

# display first 10 rows of data
head(Winter)
##   State.Code County.Code Site.Num Parameter.Code POC Latitude Longitude Datum
## 1         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 2         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 3         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 4         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 5         46          99        9          42101   3 43.59901 -96.78331 WGS84
## 6         46          99        9          42101   3 43.59901 -96.78331 WGS84
##    Parameter.Name Sample.Duration Pollutant.Standard Date.Local
## 1 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-06
## 2 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-07
## 3 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-08
## 4 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-09
## 5 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-10
## 6 Carbon monoxide          1 HOUR     CO 1-hour 1971 2022-01-11
##    Units.of.Measure Event.Type Observation.Count Observation.Percent
## 1 Parts per million       None                 8                  33
## 2 Parts per million       None                24                 100
## 3 Parts per million       None                24                 100
## 4 Parts per million       None                24                 100
## 5 Parts per million       None                24                 100
## 6 Parts per million       None                24                 100
##   Arithmetic.Mean X1st.Max.Value X1st.Max.Hour AQI Method.Code
## 1        0.312500            0.4            16  NA          93
## 2        0.225000            0.3             3  NA          93
## 3        0.275000            0.4            10  NA          93
## 4        0.254167            0.4            14  NA          93
## 5        0.275000            0.4            14  NA          93
## 6        0.387500            0.7            19  NA          93
##                                         Method.Name Local.Site.Name
## 1 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 2 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 3 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 4 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 5 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
## 6 INSTRUMENTAL - GAS FILTER CORRELATION CO ANALYZER          SF-USD
##             Address   State.Name County.Name   City.Name       CBSA.Name
## 1 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 2 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 3 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 4 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 5 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
## 6 4801 N Career Ave South Dakota   Minnehaha Sioux Falls Sioux Falls, SD
##   Date.of.Last.Change   month
## 1          2023-09-08 January
## 2          2023-09-08 January
## 3          2023-09-08 January
## 4          2023-09-08 January
## 5          2023-09-08 January
## 6          2023-09-08 January
# display dimensions on the data
dim(Winter)
## [1] 1172   30

Exericse 8

# create winter windspeed subsets for each year
winter.windspeed_2022 <- subset(SF_windspeed_2022, month == "January" | month == "February" | month == "March")
winter.windspeed_2023 <- subset(SF_windspeed_2023, month == "January" | month == "February" | month == "March")

# create winter temperature subsets for each year
winter.temp_2022 <- subset(SF_Temp_2022, month == "January" | month == "February" | month == "March")
winter.temp_2023 <- subset(SF_Temp_2023, month == "January" | month == "February" | month == "March")

# calculate average daily wind speed for each year
winter.windspeed.avg_2022 <- sum(winter.windspeed_2022$Arithmetic.Mean)/nrow(winter.windspeed_2022)
winter.windspeed.avg_2023 <- sum(winter.windspeed_2023$Arithmetic.Mean)/nrow(winter.windspeed_2023)

# calculate average daily temperature for each year
winter.temp.avg_2022 <- sum(winter.temp_2022$Arithmetic.Mean)/nrow(winter.temp_2022)
winter.temp.avg_2023 <- sum(winter.temp_2023$Arithmetic.Mean)/nrow(winter.temp_2023)

Exericse 9

# create temperature difference vector for each year
temp.diff_2022 <- winter.temp_2022$Arithmetic.Mean - winter.temp.avg_2022
temp.diff_2023 <- winter.temp_2023$Arithmetic.Mean - winter.temp.avg_2023

# sort data by increasing order
temp.diff_2022 <- sort(temp.diff_2022, decreasing = FALSE)
temp.diff_2023 <- sort(temp.diff_2023, decreasing = FALSE)

# display first 10 observations for each year
temp.diff_2022[1:10]
##  [1] -33.00000 -30.62500 -25.95833 -24.16667 -23.66667 -23.58333 -22.95833
##  [8] -22.83333 -22.62500 -22.29167
temp.diff_2023[1:10]
##  [1] -32.08971 -29.42305 -29.00638 -26.67305 -23.50638 -21.83971 -21.71471
##  [8] -21.42305 -21.21471 -21.08971

Part 3

# reorganize data so the parameter names are columns with their means for each year
CO.mean_2022 <- SF_CO_2022$Arithmetic.Mean
SO2.mean_2022 <- SF_SO2_2022$Arithmetic.Mean
Ozone.mean_2022 <- SF_ozone_2022$Arithmetic.Mean
Temp.mean_2022 <- SF_Temp_2022$Arithmetic.Mean
Windsp.mean_2022 <- SF_windspeed_2022$Arithmetic.Mean
Winddir.mean_2022 <- SF_winddirec_2022$Arithmetic.Mean

max_length_2022 <- max(length(CO.mean_2022), length(SO2.mean_2022),length(Ozone.mean_2022),
                       length(Temp.mean_2022), length(Windsp.mean_2022),length(Winddir.mean_2022))

CO.mean_2022 <- c(CO.mean_2022, rep(NA, max_length_2022 - length(CO.mean_2022)))
SO2.mean_2022 <- c(SO2.mean_2022, rep(NA, max_length_2022 - length(SO2.mean_2022)))
Ozone.mean_2022 <- c(Ozone.mean_2022, rep(NA, max_length_2022 - length(Ozone.mean_2022)))
Temp.mean_2022 <- c(Temp.mean_2022, rep(NA, max_length_2022 - length(Temp.mean_2022)))
Windsp.mean_2022 <- c(Windsp.mean_2022, rep(NA, max_length_2022 - length(Windsp.mean_2022)))
Winddir.mean_2022 <- c(Winddir.mean_2022, rep(NA, max_length_2022 - length(Winddir.mean_2022)))

means_2022 <- cbind(CO.mean_2022,SO2.mean_2022,Ozone.mean_2022,
                    Temp.mean_2022,Windsp.mean_2022,Winddir.mean_2022)
colnames(means_2022) <- c("Carbon.Monoxide.2022", "Sulfur.Dioxide.2022", "Ozone.2022", "Temperature.2022", 
                          "Wind.Speed.2022", "Wind.Direction.2022")

CO.mean_2023 <- SF_CO_2023$Arithmetic.Mean
SO2.mean_2023 <- SF_SO2_2023$Arithmetic.Mean
Ozone.mean_2023 <- SF_ozone_2023$Arithmetic.Mean
Temp.mean_2023 <- SF_Temp_2023$Arithmetic.Mean
Windsp.mean_2023 <- SF_windspeed_2023$Arithmetic.Mean
Winddir.mean_2023 <- SF_winddirec_2023$Arithmetic.Mean

max_length_2023 <- max(length(CO.mean_2023), length(SO2.mean_2023),length(Ozone.mean_2023),
                       length(Temp.mean_2023), length(Windsp.mean_2023),length(Winddir.mean_2023))

CO.mean_2023 <- c(CO.mean_2023, rep(NA, max_length_2023 - length(CO.mean_2023)))
SO2.mean_2023 <- c(SO2.mean_2023, rep(NA, max_length_2023 - length(SO2.mean_2023)))
Ozone.mean_2023 <- c(Ozone.mean_2023, rep(NA, max_length_2023 - length(Ozone.mean_2023)))
Temp.mean_2023 <- c(Temp.mean_2023, rep(NA, max_length_2023 - length(Temp.mean_2023)))
Windsp.mean_2023 <- c(Windsp.mean_2023, rep(NA, max_length_2023 - length(Windsp.mean_2023)))
Winddir.mean_2023 <- c(Winddir.mean_2023, rep(NA, max_length_2023 - length(Winddir.mean_2023)))

means_2023 <- cbind(CO.mean_2023,SO2.mean_2023,Ozone.mean_2023,
                    Temp.mean_2023,Windsp.mean_2023,Winddir.mean_2023)
colnames(means_2023) <- c("Carbon.Monoxide.2023", "Sulfur.Dioxide.2023", "Ozone.2023", "Temperature.2023", 
                          "Wind.Speed.2023", "Wind.Direction.2023")

# combine data sets into one
weather.means <- cbind(means_2022,means_2023)
weather.means <- as.data.frame(weather.means)

Exercise 10

library(shiny)

# Define UI for application that draws a histogram
ui <- fluidPage(

    # Application title
    titlePanel("Weather Data in 2022 and 2023"),

    # Sidebar with a slider input for number of bins 
    sidebarLayout(
        sidebarPanel(selectInput("cid","Column", choices = colnames(weather.means)),
            sliderInput("bins",
                        "Number of bins:",
                        min = 1,
                        max = 50,
                        value = 12)
        ),

        # Show a plot of the generated distribution
        mainPanel(
           plotOutput("distPlot")
        )
    )
)


# Define server logic required to draw a histogram
server <- function(input, output) {

    output$distPlot <- renderPlot({
        # generate bins based on input$bins from ui.R
        x    <- weather.means[, input$cid]
        bins <- seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length.out = input$bins + 1)
        
        #create title
        titl <- paste("Histogram of", input$cid, sep = " ")
        # draw the histogram with the specified number of bins
        hist(x, breaks = bins, col = 'lightblue', border = 'white',
             xlab = input$cid,
             main = titl)
    })
}

# Run the application 
shinyApp(ui = ui, server = server)

# URL for Shiny App
# https://129zra-eric-sell.shinyapps.io/STAT515-Final/