Course: ENVS 203-001 Environmental Statistics
Data Check
# Libraries ==============================================================
library(DescTools)
library(Stat2Data)
library(ggplot2)
library(RColorBrewer)
# Data Import ============================================================
OhareData <- read.csv("ENVS203_HW08_OhareDataset_24march2026.csv",
stringsAsFactors = T)
# Data Check =============================================================
summary(OhareData)
## YEAR DECADE TMAX_MAX TMIN_MAX TMAX_AVG
## Min. :1960 1960s:10 Min. : 91.00 Min. :-23.00 Min. :56.58
## 1st Qu.:1965 2010s:10 1st Qu.: 93.75 1st Qu.:-16.25 1st Qu.:57.93
## Median :1990 Median : 95.00 Median :-13.00 Median :58.57
## Mean :1990 Mean : 95.35 Mean :-11.50 Mean :58.95
## 3rd Qu.:2014 3rd Qu.: 96.25 3rd Qu.: -8.00 3rd Qu.:59.81
## Max. :2019 Max. :103.00 Max. : 5.00 Max. :63.92
## TMIN_AVG
## Min. :36.16
## 1st Qu.:38.64
## Median :40.61
## Mean :40.56
## 3rd Qu.:42.63
## Max. :45.13
Two-Tailed t-test for Two Samples
# Two-Tailed t-test for Two Samples==========================================
# Ho: Means are equal
# Ha: Means are not equal
# Box plot---------------------------------------------------------------
boxplot(TMAX_MAX ~ DECADE, data = OhareData,
main = "Boxplot for Highest Annual Temperature (F) by Decade",
xlab = "Decade",
ylab = "Highest Annual Temperature (F)")
# Dot plot variance assessment -----------------------------------------
graphOhare01 <- ggplot(data = OhareData, aes(x = DECADE, y = TMAX_MAX))
graphOhare01 + theme_classic() +
geom_dotplot(binaxis = "y", stackdir = "center",
dotsize = 0.5) +
labs(title = "Highest Annual Temperature (F) by Decade",
x = "Decade",
y = "Highest Annual Temperature (F)")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
#Normality test -----------------------------------------------
qqnorm(OhareData$TMAX_MAX)
qqline(OhareData$TMAX_MAX) # data are normal
# Shapiro test --------------------------------------------------
# Ho: Data are normal (i.e. p > 0.05)
# Ha: Data are not normal
shapiro.test(OhareData$TMAX_MAX) # p-value = 0.2222; fail to reject the null, data are normal
##
## Shapiro-Wilk normality test
##
## data: OhareData$TMAX_MAX
## W = 0.93826, p-value = 0.2222
# Variance assessment --------------------
# Ho: Variances are equal
# Ha: Variances are not equal
var.test(TMAX_MAX ~ DECADE, data = OhareData, alternative = "two.sided") # p-value = 0.1182; fail to reject the null, variances are equal
##
## F test to compare two variances
##
## data: TMAX_MAX by DECADE
## F = 0.33422, num df = 9, denom df = 9, p-value = 0.1182
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.08301607 1.34557671
## sample estimates:
## ratio of variances
## 0.3342222
# Two-tailed t-test -----------------------
# Ho: Means are equal
# Ha: Means are not equal
t.test(TMAX_MAX ~ DECADE, data = OhareData,
var.equal = TRUE, alternative = "two.sided") # p-value = 0.8189; fail to reject the null hypothesis, means are equal
##
## Two Sample t-test
##
## data: TMAX_MAX by DECADE
## t = -0.2323, df = 18, p-value = 0.8189
## alternative hypothesis: true difference in means between group 1960s and group 2010s is not equal to 0
## 95 percent confidence interval:
## -3.013183 2.413183
## sample estimates:
## mean in group 1960s mean in group 2010s
## 95.2 95.5
# Boxplot for TMAX_MAX by Decade
boxOhare01 <- ggplot(data = OhareData, aes(x = DECADE, y = TMAX_MAX,
fill = DECADE)) +
geom_boxplot() +
theme_classic() +
labs(
title = "Boxplot for Highest Annual Temperature (F) by Decade",
x = "Decade",
y = "Highest Annual Temperature (F)")
boxOhare01
Interpretation of Results: The means of the
highest annual temperatures (F) in the 1960s and the 2010s are not
significantly different.
# One-Tailed t-test for Two Samples ==========================================
# Ho: Data are normal (i.e. p-value > 0.05)
# Ha: Data are not normal (i.e. p-value <= 0.05)
# Box plot---------------------------------------------------------------
boxplot(TMIN_AVG ~ DECADE, data = OhareData,
main = "Average Annual Lowest Daily Temperature (F) by Decade",
xlab = "Decade",
ylab = "Average Annual Lowest Daily Temperature")
# Dot plot variance assessment -----------------------------------------
graphOhare02 <- ggplot(data = OhareData, aes(x = DECADE, y = TMIN_AVG))
graphOhare02 + theme_classic() +
geom_dotplot(binaxis = "y", stackdir = "center",
dotsize = 0.5) +
labs(title = "Average Annual Lowest Daily Temperature (F) by Decade",
x = "Decade",
y = "Average Annual Lowest Daily Temperature (F)")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
#Normality test -----------------------------------------------
qqnorm(OhareData$TMIN_AVG)
qqline(OhareData$TMIN_AVG) # data are normal
# Shapiro test --------------------------------------------------
# Ho: Data are normal (i.e. p > 0.05)
# Ha: Data are not normal
shapiro.test(OhareData$TMIN_AVG) # p-value = 0.6833; fail to reject the null, data are normal
##
## Shapiro-Wilk normality test
##
## data: OhareData$TMIN_AVG
## W = 0.96666, p-value = 0.6833
# Variance assessment -------------------------------------------------
# Ho: Variances are equal
# Ha: Variances are not equal
var.test(TMIN_AVG ~ DECADE, data = OhareData, alternative = "two.sided") # p-value = 0.5586; fail to reject the null, variances are equal
##
## F test to compare two variances
##
## data: TMIN_AVG by DECADE
## F = 0.66885, num df = 9, denom df = 9, p-value = 0.5586
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1661325 2.6927802
## sample estimates:
## ratio of variances
## 0.6688485
# One-tailed t-test ----------------------------------------------------
# Ho: Means are equal
# Ha: Means are not equal
t.test(TMIN_AVG ~ DECADE, data = OhareData, var.equal = TRUE,
alternative = "less") # p-value = 0.00008446; reject the null hypothesis, mean of TMIN_AVG in 1960s is < TMIN_AVG in 2010s
##
## Two Sample t-test
##
## data: TMIN_AVG by DECADE
## t = -4.725, df = 18, p-value = 8.446e-05
## alternative hypothesis: true difference in means between group 1960s and group 2010s is less than 0
## 95 percent confidence interval:
## -Inf -2.260655
## sample estimates:
## mean in group 1960s mean in group 2010s
## 38.77474 42.34604
t.test(TMIN_AVG ~ DECADE, data = OhareData, var.equal = TRUE,
alternative = "greater") # p-value = 0.9999; fail to reject null hypothesis, mean of TMIN_AVG in 1960s is not greater than TMIN_AVG in 2010s
##
## Two Sample t-test
##
## data: TMIN_AVG by DECADE
## t = -4.725, df = 18, p-value = 0.9999
## alternative hypothesis: true difference in means between group 1960s and group 2010s is greater than 0
## 95 percent confidence interval:
## -4.881947 Inf
## sample estimates:
## mean in group 1960s mean in group 2010s
## 38.77474 42.34604
# Boxplot for TMIN_AVG by Decade
boxOhare02 <- ggplot(data = OhareData, aes(x = DECADE, y = TMIN_AVG,
fill = DECADE)) +
geom_boxplot() +
theme_classic() +
labs(
title = "Boxplot for Average Annual Lowest Daily Temperature (F) by Decade",
x = "Decade",
y = "Average Annual Lowest Daily Temperature (F)")
boxOhare02