Course: ENVS 203-001 Environmental Statistics

Data Check

# Libraries ==============================================================
library(DescTools)
library(Stat2Data)
library(ggplot2)
library(RColorBrewer)

# Data Import ============================================================
OhareData <- read.csv("ENVS203_HW08_OhareDataset_24march2026.csv",
                      stringsAsFactors = T)

# Data Check =============================================================
summary(OhareData)
##       YEAR        DECADE      TMAX_MAX         TMIN_MAX         TMAX_AVG    
##  Min.   :1960   1960s:10   Min.   : 91.00   Min.   :-23.00   Min.   :56.58  
##  1st Qu.:1965   2010s:10   1st Qu.: 93.75   1st Qu.:-16.25   1st Qu.:57.93  
##  Median :1990              Median : 95.00   Median :-13.00   Median :58.57  
##  Mean   :1990              Mean   : 95.35   Mean   :-11.50   Mean   :58.95  
##  3rd Qu.:2014              3rd Qu.: 96.25   3rd Qu.: -8.00   3rd Qu.:59.81  
##  Max.   :2019              Max.   :103.00   Max.   :  5.00   Max.   :63.92  
##     TMIN_AVG    
##  Min.   :36.16  
##  1st Qu.:38.64  
##  Median :40.61  
##  Mean   :40.56  
##  3rd Qu.:42.63  
##  Max.   :45.13



Two-Tailed t-test for Two Samples

# Two-Tailed t-test for Two Samples==========================================
# Ho: Means are equal
# Ha: Means are not equal

# Box plot---------------------------------------------------------------
boxplot(TMAX_MAX ~ DECADE, data = OhareData,
        main = "Boxplot for Highest Annual Temperature (F) by Decade",
        xlab = "Decade",
        ylab = "Highest Annual Temperature (F)")

# Dot plot variance assessment -----------------------------------------
graphOhare01 <- ggplot(data = OhareData, aes(x = DECADE, y = TMAX_MAX))

graphOhare01 + theme_classic() +
  geom_dotplot(binaxis = "y", stackdir = "center",
               dotsize = 0.5) + 
  labs(title = "Highest Annual Temperature (F) by Decade",
       x = "Decade",
       y = "Highest Annual Temperature (F)")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

#Normality test -----------------------------------------------
qqnorm(OhareData$TMAX_MAX)
qqline(OhareData$TMAX_MAX) # data are normal

# Shapiro test --------------------------------------------------
# Ho: Data are normal (i.e. p > 0.05)
# Ha: Data are not normal

shapiro.test(OhareData$TMAX_MAX) # p-value = 0.2222; fail to reject the null, data are normal
## 
##  Shapiro-Wilk normality test
## 
## data:  OhareData$TMAX_MAX
## W = 0.93826, p-value = 0.2222
# Variance assessment --------------------
# Ho: Variances are equal
# Ha: Variances are not equal

var.test(TMAX_MAX ~ DECADE, data = OhareData, alternative = "two.sided") # p-value = 0.1182; fail to reject the null, variances are equal
## 
##  F test to compare two variances
## 
## data:  TMAX_MAX by DECADE
## F = 0.33422, num df = 9, denom df = 9, p-value = 0.1182
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.08301607 1.34557671
## sample estimates:
## ratio of variances 
##          0.3342222
# Two-tailed t-test -----------------------
# Ho: Means are equal
# Ha: Means are not equal

t.test(TMAX_MAX ~ DECADE, data = OhareData,
       var.equal = TRUE, alternative = "two.sided") # p-value = 0.8189; fail to reject the null hypothesis, means are equal
## 
##  Two Sample t-test
## 
## data:  TMAX_MAX by DECADE
## t = -0.2323, df = 18, p-value = 0.8189
## alternative hypothesis: true difference in means between group 1960s and group 2010s is not equal to 0
## 95 percent confidence interval:
##  -3.013183  2.413183
## sample estimates:
## mean in group 1960s mean in group 2010s 
##                95.2                95.5
# Boxplot for TMAX_MAX by Decade
boxOhare01 <- ggplot(data = OhareData, aes(x = DECADE, y = TMAX_MAX,
                                           fill = DECADE)) +
  geom_boxplot() +
  theme_classic() +
  labs(
    title = "Boxplot for Highest Annual Temperature (F) by Decade",
    x = "Decade",
    y = "Highest Annual Temperature (F)")

boxOhare01



Interpretation of Results: The means of the highest annual temperatures (F) in the 1960s and the 2010s are not significantly different.

# One-Tailed t-test for Two Samples ==========================================
# Ho: Data are normal (i.e. p-value > 0.05)
# Ha: Data are not normal (i.e. p-value <= 0.05)

# Box plot---------------------------------------------------------------
boxplot(TMIN_AVG ~ DECADE, data = OhareData,
        main = "Average Annual Lowest Daily Temperature (F) by Decade",
        xlab = "Decade",
        ylab = "Average Annual Lowest Daily Temperature")

# Dot plot variance assessment -----------------------------------------
graphOhare02 <- ggplot(data = OhareData, aes(x = DECADE, y = TMIN_AVG))

graphOhare02 + theme_classic() +
  geom_dotplot(binaxis = "y", stackdir = "center",
               dotsize = 0.5) + 
  labs(title = "Average Annual Lowest Daily Temperature (F) by Decade",
       x = "Decade",
       y = "Average Annual Lowest Daily Temperature (F)")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

#Normality test -----------------------------------------------
qqnorm(OhareData$TMIN_AVG)
qqline(OhareData$TMIN_AVG) # data are normal

# Shapiro test --------------------------------------------------
# Ho: Data are normal (i.e. p > 0.05)
# Ha: Data are not normal

shapiro.test(OhareData$TMIN_AVG) # p-value = 0.6833; fail to reject the null, data are normal
## 
##  Shapiro-Wilk normality test
## 
## data:  OhareData$TMIN_AVG
## W = 0.96666, p-value = 0.6833
# Variance assessment -------------------------------------------------
# Ho: Variances are equal
# Ha: Variances are not equal

var.test(TMIN_AVG ~ DECADE, data = OhareData, alternative = "two.sided") # p-value = 0.5586; fail to reject the null, variances are equal
## 
##  F test to compare two variances
## 
## data:  TMIN_AVG by DECADE
## F = 0.66885, num df = 9, denom df = 9, p-value = 0.5586
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1661325 2.6927802
## sample estimates:
## ratio of variances 
##          0.6688485
# One-tailed t-test ----------------------------------------------------
# Ho: Means are equal
# Ha: Means are not equal

t.test(TMIN_AVG ~ DECADE, data = OhareData, var.equal = TRUE,
       alternative = "less") # p-value = 0.00008446; reject the null hypothesis, mean of TMIN_AVG in 1960s is < TMIN_AVG in 2010s
## 
##  Two Sample t-test
## 
## data:  TMIN_AVG by DECADE
## t = -4.725, df = 18, p-value = 8.446e-05
## alternative hypothesis: true difference in means between group 1960s and group 2010s is less than 0
## 95 percent confidence interval:
##       -Inf -2.260655
## sample estimates:
## mean in group 1960s mean in group 2010s 
##            38.77474            42.34604
t.test(TMIN_AVG ~ DECADE, data = OhareData, var.equal = TRUE,
       alternative = "greater") # p-value = 0.9999; fail to reject null hypothesis, mean of TMIN_AVG in 1960s is not greater than TMIN_AVG in 2010s
## 
##  Two Sample t-test
## 
## data:  TMIN_AVG by DECADE
## t = -4.725, df = 18, p-value = 0.9999
## alternative hypothesis: true difference in means between group 1960s and group 2010s is greater than 0
## 95 percent confidence interval:
##  -4.881947       Inf
## sample estimates:
## mean in group 1960s mean in group 2010s 
##            38.77474            42.34604
# Boxplot for TMIN_AVG by Decade
boxOhare02 <- ggplot(data = OhareData, aes(x = DECADE, y = TMIN_AVG,
                                           fill = DECADE)) +
  geom_boxplot() +
  theme_classic() +
  labs(
    title = "Boxplot for Average Annual Lowest Daily Temperature (F) by Decade",
    x = "Decade",
    y = "Average Annual Lowest Daily Temperature (F)")

boxOhare02