Course: ENVS 203-001 Environmental Statistics
Data Check
# Libraries
library(DescTools)
library(Stat2Data)
library(plyr)
library(ggplot2)
# File Import
OhareData <- read.csv("ENVS203_Collins_Ethan_HW07_OhareDataset_23march2026.csv", stringsAsFactors = T)
# Data Check
summary(OhareData)
## year prcpMax_in snowMax_in snwdMax_in
## Min. :1960 Min. :1.34 Min. : 0.000 Min. : 0.000
## 1st Qu.:1976 1st Qu.:1.98 1st Qu.: 4.200 1st Qu.: 5.700
## Median :1992 Median :2.55 Median : 6.000 Median : 8.000
## Mean :1992 Mean :2.79 Mean : 6.764 Mean : 9.012
## 3rd Qu.:2009 3rd Qu.:3.43 3rd Qu.: 8.475 3rd Qu.:11.000
## Max. :2025 Max. :6.86 Max. :18.600 Max. :28.000
## NA's :1 NA's :2
## tmaxMax_F tminMax_F tmaxAvg_F tminAvg_F
## Min. : 91.00 Min. :-27.00 Min. :56.06 Min. :36.16
## 1st Qu.: 94.00 1st Qu.:-15.75 1st Qu.:57.91 1st Qu.:38.99
## Median : 95.50 Median :-10.00 Median :58.99 Median :40.75
## Mean : 96.38 Mean :-10.88 Mean :59.19 Mean :40.90
## 3rd Qu.: 99.00 3rd Qu.: -7.00 3rd Qu.:60.20 3rd Qu.:42.39
## Max. :104.00 Max. : 5.00 Max. :63.92 Max. :46.46
##
str(OhareData)
## 'data.frame': 66 obs. of 8 variables:
## $ year : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ prcpMax_in: num 2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
## $ snowMax_in: num 5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
## $ snwdMax_in: num 11 8 10 8 7 10 7 27 5 10 ...
## $ tmaxMax_F : int 99 93 94 95 96 94 97 92 96 96 ...
## $ tminMax_F : int -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
## $ tmaxAvg_F : num 57.2 58.3 57.9 58.3 59.8 ...
## $ tminAvg_F : num 40.5 38.7 38.5 36.2 37.9 ...
Fahrenheit to Celsius Conversion
convertOhareData <- transform(OhareData,
tmaxMax_C = (OhareData$tmaxMax_F - 32) * (5/9),
tminMax_C = (OhareData$tminMax_F - 32) * (5/9),
tmaxAvg_C = (OhareData$tmaxAvg_F - 32) * (5/9),
tminAvg_C = (OhareData$tminAvg_F - 32) * (5/9))
Categorizing by Time in Decades
decadesOhareData <- within(convertOhareData, {
decade <- NA
decade[year >= 1960 & year < 1970] <- "1960s"
decade[year >= 1970 & year < 1980] <- "1970s"
decade[year >= 1980 & year < 1990] <- "1980s"
decade[year >= 1990 & year < 2000] <- "1990s"
decade[year >= 2000 & year < 2010] <- "2000s"
decade[year >= 2010 & year < 2020] <- "2010s"
decade[year >= 2020 & year < 2030] <- "2020s"
})
Summary of Average Annual Highest Daily Temperature
(C)
sumOhareData01 <- ddply(decadesOhareData, c("decade"), summarize,
counttmaxAvg_C = sum(!is.na(tmaxAvg_C)),
meantmaxAvg_C = mean(tmaxAvg_C, na.rm = TRUE),
sdtmaxAvg_C = sd(tmaxAvg_C, na.rm = TRUE),
setmaxAvg_C = sdtmaxAvg_C / sqrt(counttmaxAvg_C),
CI95tmaxAvg_C = setmaxAvg_C * 1.96)
sumOhareData01
## decade counttmaxAvg_C meantmaxAvg_C sdtmaxAvg_C setmaxAvg_C CI95tmaxAvg_C
## 1 1960s 10 14.61008 0.5296523 0.1674908 0.3282819
## 2 1970s 10 14.83865 0.8081171 0.2555491 0.5008761
## 3 1980s 10 14.82960 0.7504866 0.2373247 0.4651564
## 4 1990s 10 15.09043 1.0895146 0.3445348 0.6752881
## 5 2000s 10 15.09840 0.6432253 0.2034057 0.3986751
## 6 2010s 10 15.33399 1.1446605 0.3619734 0.7094679
## 7 2020s 6 16.48015 0.7171727 0.2927845 0.5738577
Boxplot of Average Annual Highest Daily Temperature
(C)
ggplot(decadesOhareData, aes(x = decade, y = tmaxAvg_C)) +
geom_boxplot() +
labs(
title = "Boxplot of Average Annual Highest Daily Temperature by Decade",
x = "Decade",
y = "Average Annual Highest Daily Temperature (C)")
Bar Graph of Mean SD for Average Annual Highest Daily
Temperature (C) by Decade
ggplot(sumOhareData01, aes(x = decade, y = meantmaxAvg_C)) +
geom_col(fill = "red") +
geom_errorbar(
aes(ymin = meantmaxAvg_C - sdtmaxAvg_C,
ymax = meantmaxAvg_C + sdtmaxAvg_C),
width = 0.2
) +
labs(
title = "Mean ± SD of Average Annual Highest Daily Temperature by Decade",
x = "Decade",
y = "Mean TMAX_AVG (C)")
Point Graph of Mean SE for Average Annual Highest
Daily Temperature (C) by Decade
ggplot(sumOhareData01, aes(x = decade, y = meantmaxAvg_C, group = 1)) +
geom_point(size = 3) +
geom_line() +
geom_errorbar(
aes(ymin = meantmaxAvg_C - setmaxAvg_C,
ymax = meantmaxAvg_C + setmaxAvg_C),
width = 0.2
) +
labs(
title = "Mean ± SE of Average Annual Highest Daily Temperature by Decade",
x = "Decade",
y = "Mean tmaxAvg_C")