Course: ENVS 203-001 Environmental Statistics

Data Check

# Libraries
library(DescTools)
library(Stat2Data)
library(plyr)
library(ggplot2)

# File Import
OhareData <- read.csv("ENVS203_Collins_Ethan_HW07_OhareDataset_23march2026.csv",                      stringsAsFactors = T)

# Data Check
summary(OhareData)
##       year        prcpMax_in     snowMax_in       snwdMax_in    
##  Min.   :1960   Min.   :1.34   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:1976   1st Qu.:1.98   1st Qu.: 4.200   1st Qu.: 5.700  
##  Median :1992   Median :2.55   Median : 6.000   Median : 8.000  
##  Mean   :1992   Mean   :2.79   Mean   : 6.764   Mean   : 9.012  
##  3rd Qu.:2009   3rd Qu.:3.43   3rd Qu.: 8.475   3rd Qu.:11.000  
##  Max.   :2025   Max.   :6.86   Max.   :18.600   Max.   :28.000  
##                 NA's   :1                       NA's   :2       
##    tmaxMax_F        tminMax_F        tmaxAvg_F       tminAvg_F    
##  Min.   : 91.00   Min.   :-27.00   Min.   :56.06   Min.   :36.16  
##  1st Qu.: 94.00   1st Qu.:-15.75   1st Qu.:57.91   1st Qu.:38.99  
##  Median : 95.50   Median :-10.00   Median :58.99   Median :40.75  
##  Mean   : 96.38   Mean   :-10.88   Mean   :59.19   Mean   :40.90  
##  3rd Qu.: 99.00   3rd Qu.: -7.00   3rd Qu.:60.20   3rd Qu.:42.39  
##  Max.   :104.00   Max.   :  5.00   Max.   :63.92   Max.   :46.46  
## 
str(OhareData)
## 'data.frame':    66 obs. of  8 variables:
##  $ year      : int  1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
##  $ prcpMax_in: num  2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
##  $ snowMax_in: num  5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
##  $ snwdMax_in: num  11 8 10 8 7 10 7 27 5 10 ...
##  $ tmaxMax_F : int  99 93 94 95 96 94 97 92 96 96 ...
##  $ tminMax_F : int  -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
##  $ tmaxAvg_F : num  57.2 58.3 57.9 58.3 59.8 ...
##  $ tminAvg_F : num  40.5 38.7 38.5 36.2 37.9 ...



Fahrenheit to Celsius Conversion

convertOhareData <- transform(OhareData,
                              tmaxMax_C = (OhareData$tmaxMax_F - 32) * (5/9),
                              tminMax_C = (OhareData$tminMax_F - 32) * (5/9),
                              tmaxAvg_C = (OhareData$tmaxAvg_F - 32) * (5/9),
                              tminAvg_C = (OhareData$tminAvg_F - 32) * (5/9))



Categorizing by Time in Decades

decadesOhareData <- within(convertOhareData, {
  decade <- NA
  decade[year >= 1960 & year < 1970] <- "1960s"
  decade[year >= 1970 & year < 1980] <- "1970s"
  decade[year >= 1980 & year < 1990] <- "1980s"
  decade[year >= 1990 & year < 2000] <- "1990s"
  decade[year >= 2000 & year < 2010] <- "2000s"
  decade[year >= 2010 & year < 2020] <- "2010s"
  decade[year >= 2020 & year < 2030] <- "2020s"
})



Summary of Average Annual Highest Daily Temperature (C)

sumOhareData01 <- ddply(decadesOhareData, c("decade"), summarize,
                      counttmaxAvg_C = sum(!is.na(tmaxAvg_C)),
                      meantmaxAvg_C = mean(tmaxAvg_C, na.rm = TRUE),
                      sdtmaxAvg_C = sd(tmaxAvg_C, na.rm = TRUE),
                      setmaxAvg_C = sdtmaxAvg_C / sqrt(counttmaxAvg_C),
                      CI95tmaxAvg_C = setmaxAvg_C * 1.96)

sumOhareData01
##   decade counttmaxAvg_C meantmaxAvg_C sdtmaxAvg_C setmaxAvg_C CI95tmaxAvg_C
## 1  1960s             10      14.61008   0.5296523   0.1674908     0.3282819
## 2  1970s             10      14.83865   0.8081171   0.2555491     0.5008761
## 3  1980s             10      14.82960   0.7504866   0.2373247     0.4651564
## 4  1990s             10      15.09043   1.0895146   0.3445348     0.6752881
## 5  2000s             10      15.09840   0.6432253   0.2034057     0.3986751
## 6  2010s             10      15.33399   1.1446605   0.3619734     0.7094679
## 7  2020s              6      16.48015   0.7171727   0.2927845     0.5738577



Boxplot of Average Annual Highest Daily Temperature (C)

ggplot(decadesOhareData, aes(x = decade, y = tmaxAvg_C)) +
  geom_boxplot() +
  labs(
    title = "Boxplot of Average Annual Highest Daily Temperature by Decade",
    x = "Decade",
    y = "Average Annual Highest Daily Temperature (C)")



Bar Graph of Mean SD for Average Annual Highest Daily Temperature (C) by Decade

ggplot(sumOhareData01, aes(x = decade, y = meantmaxAvg_C)) +
  geom_col(fill = "red") +
  geom_errorbar(
    aes(ymin = meantmaxAvg_C - sdtmaxAvg_C,
        ymax = meantmaxAvg_C + sdtmaxAvg_C),
    width = 0.2
  ) +
  labs(
    title = "Mean ± SD of Average Annual Highest Daily Temperature by Decade",
    x = "Decade",
    y = "Mean TMAX_AVG (C)")



Point Graph of Mean SE for Average Annual Highest Daily Temperature (C) by Decade

ggplot(sumOhareData01, aes(x = decade, y = meantmaxAvg_C, group = 1)) +
  geom_point(size = 3) +
  geom_line() +
  geom_errorbar(
    aes(ymin = meantmaxAvg_C - setmaxAvg_C,
        ymax = meantmaxAvg_C + setmaxAvg_C),
    width = 0.2
  ) +
  labs(
    title = "Mean ± SE of Average Annual Highest Daily Temperature by Decade",
    x = "Decade",
    y = "Mean tmaxAvg_C")