knitr::opts_chunk$set(echo = TRUE)
library(lubridate)
library(ggplot2)
library(tidyr)
library(dplyr)
library(broom)
A student asked me to help him with an analysis of ocean temperature and salinity data taken at the GAK1 oceanographic research station located at the mouth of Resurrection Bay in Alaska, USA. The data set starts in 1970 and continues to the present. You can read about it here: http://research.cfos.uaf.edu/gak1/
gak1 <- read.csv("http://research.cfos.uaf.edu/gak1/data/TimeSeries/gak1.csv")
gak1 <- gak1[-c(1,2),]
gak1$Depth <- as.factor(gak1$Depth)
gak1$Temp <- as.numeric(gak1$Temp)
gak1$Sal <- as.numeric(gak1$Sal)
gak1$Year <- as.numeric(gak1$Year)
gak1$ConvertedYear <- date_decimal(gak1$Year, tz = "US/Alaska")
gak1$Sigma <- as.numeric(gak1$Sigma)
gak1$Delta.D <- as.numeric(gak1$Delta.D)
str(gak1)
## 'data.frame': 5841 obs. of 9 variables:
## $ Cruise : chr "AC109" "AC109" "AC109" "AC109" ...
## $ St : chr "726" "726" "726" "726" ...
## $ Year : num 1971 1971 1971 1971 1971 ...
## $ Depth : Factor w/ 10 levels "0","10","100",..: 1 2 5 8 9 10 3 4 6 7 ...
## $ Temp : num 4.91 5.01 5.15 5.16 5.17 5.3 5.5 6.23 6.45 5.58 ...
## $ Sal : num 30.6 30.7 30.7 30.7 30.8 ...
## $ Sigma : num 24.2 24.2 24.3 24.3 24.3 ...
## $ Delta.D : num 0 0.0371 0.074 0.1107 0.1838 ...
## $ ConvertedYear: POSIXct, format: "1970-12-15 12:18:38" "1970-12-15 12:18:38" ...
summary(gak1)
## Cruise St Year Depth
## Length:5841 Length:5841 Min. :1971 0 : 599
## Class :character Class :character 1st Qu.:1989 10 : 598
## Mode :character Mode :character Median :2000 20 : 598
## Mean :1998 30 : 596
## 3rd Qu.:2007 50 : 593
## Max. :2021 75 : 587
## (Other):2270
## Temp Sal Sigma Delta.D
## Min. : 1.730 Min. :17.53 Min. :13.02 Min. :0.00000
## 1st Qu.: 5.141 1st Qu.:30.84 1st Qu.:24.09 1st Qu.:0.08356
## Median : 5.979 Median :31.55 Median :24.88 Median :0.23781
## Mean : 6.724 Mean :31.22 Mean :24.45 Mean :0.29290
## 3rd Qu.: 7.693 3rd Qu.:32.22 3rd Qu.:25.39 3rd Qu.:0.47702
## Max. :15.902 Max. :33.60 Max. :26.52 Max. :0.89487
##
## ConvertedYear
## Min. :1970-12-15 12:18:38
## 1st Qu.:1989-05-28 17:49:17
## Median :2000-02-23 12:34:19
## Mean :1998-07-01 15:08:18
## 3rd Qu.:2007-05-10 19:00:17
## Max. :2020-09-01 20:00:09
##
The data set consists of 5841 datapoints, with measurements of temperature (Temp) and salinity (Sal) taken at ten different depths, from the surface down to 250 meters deep.
###Temperature vs Time and Depth
ggplot(gak1, aes(x = ConvertedYear, y = Temp, group = Depth, colour = Depth)) +
theme_bw() +
geom_point() +
geom_smooth(method = "loess", formula = y ~ x)
labs(x = "Year",
y = "Temperature (ºC)",
title = "Ocean temperature over time",
subtitle = "Resurection Bay, Alaska")
## $x
## [1] "Year"
##
## $y
## [1] "Temperature (ºC)"
##
## $title
## [1] "Ocean temperature over time"
##
## $subtitle
## [1] "Resurection Bay, Alaska"
##
## attr(,"class")
## [1] "labels"
The plot reveals that the relationship between tempeature and time is nonlinear, accelerating in the last few years, especially close to the surface. I then fit a cubic ANCOVA model to the data.
temp.lm <- lm(Temp~(Year + I(Year^2) + I(Year^3))*Depth, data = gak1)
summary(temp.lm)
##
## Call:
## lm(formula = Temp ~ (Year + I(Year^2) + I(Year^3)) * Depth, data = gak1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4541 -1.4564 -0.1326 1.0063 8.0369
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.572e+06 3.548e+05 -4.431 9.57e-06 ***
## Year 2.363e+03 5.330e+02 4.433 9.46e-06 ***
## I(Year^2) -1.184e+00 2.669e-01 -4.436 9.35e-06 ***
## I(Year^3) 1.977e-04 4.453e-05 4.438 9.24e-06 ***
## Depth10 2.052e+04 5.018e+05 0.041 0.9674
## Depth100 9.039e+05 5.049e+05 1.790 0.0734 .
## Depth150 8.862e+05 5.057e+05 1.752 0.0797 .
## Depth20 1.039e+05 5.018e+05 0.207 0.8360
## Depth200 9.391e+05 5.082e+05 1.848 0.0647 .
## Depth250 9.817e+05 5.302e+05 1.852 0.0641 .
## Depth30 3.749e+05 5.020e+05 0.747 0.4552
## Depth50 6.585e+05 5.029e+05 1.309 0.1905
## Depth75 8.823e+05 5.044e+05 1.749 0.0803 .
## Year:Depth10 -3.096e+01 7.537e+02 -0.041 0.9672
## Year:Depth100 -1.360e+03 7.584e+02 -1.793 0.0730 .
## Year:Depth150 -1.333e+03 7.596e+02 -1.755 0.0793 .
## Year:Depth20 -1.560e+02 7.537e+02 -0.207 0.8360
## Year:Depth200 -1.413e+03 7.633e+02 -1.851 0.0643 .
## Year:Depth250 -1.476e+03 7.963e+02 -1.854 0.0638 .
## Year:Depth30 -5.634e+02 7.541e+02 -0.747 0.4550
## Year:Depth50 -9.893e+02 7.555e+02 -1.309 0.1904
## Year:Depth75 -1.327e+03 7.576e+02 -1.751 0.0800 .
## I(Year^2):Depth10 1.556e-02 3.774e-01 0.041 0.9671
## I(Year^2):Depth100 6.817e-01 3.797e-01 1.795 0.0726 .
## I(Year^2):Depth150 6.686e-01 3.803e-01 1.758 0.0788 .
## I(Year^2):Depth20 7.811e-02 3.774e-01 0.207 0.8360
## I(Year^2):Depth200 7.082e-01 3.822e-01 1.853 0.0639 .
## I(Year^2):Depth250 7.402e-01 3.987e-01 1.857 0.0634 .
## I(Year^2):Depth30 2.822e-01 3.776e-01 0.748 0.4548
## I(Year^2):Depth50 4.954e-01 3.783e-01 1.310 0.1904
## I(Year^2):Depth75 6.650e-01 3.793e-01 1.753 0.0796 .
## I(Year^3):Depth10 -2.607e-06 6.298e-05 -0.041 0.9670
## I(Year^3):Depth100 -1.139e-04 6.337e-05 -1.798 0.0722 .
## I(Year^3):Depth150 -1.118e-04 6.348e-05 -1.761 0.0783 .
## I(Year^3):Depth20 -1.303e-05 6.298e-05 -0.207 0.8361
## I(Year^3):Depth200 -1.184e-04 6.379e-05 -1.856 0.0636 .
## I(Year^3):Depth250 -1.237e-04 6.653e-05 -1.859 0.0631 .
## I(Year^3):Depth30 -4.713e-05 6.301e-05 -0.748 0.4546
## I(Year^3):Depth50 -8.269e-05 6.313e-05 -1.310 0.1903
## I(Year^3):Depth75 -1.111e-04 6.331e-05 -1.755 0.0793 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.289 on 5801 degrees of freedom
## Multiple R-squared: 0.1565, Adjusted R-squared: 0.1508
## F-statistic: 27.6 on 39 and 5801 DF, p-value: < 2.2e-16
temp.lm <- lm(Temp~Year + I(Year^2) + I(Year^3), data = gak1)
ggplot(gak1, aes(x = Year, y = Temp, group = Depth, colour = Depth)) +
theme_bw() +
geom_point() +
geom_smooth(method = "lm", formula = y ~ poly(x, 3, raw = TRUE)) +
labs(x = "Time",
y = "Temperature (ºC)",
title = "Change in ocean temperature over time",
subtitle = "Resurection Bay, Alaska")
The good news is that there are no signficant differences in the rate of temperature change, although some of the different depths come close and the R^2 for the full model explains significantly more of the variation than the simpler model without depth. The main regression model for ocean temperature at the GAK1 research station is simply the default -1.0092598^{6} + 1516.4649482* Year + -0.7595061* Year^2 + 1.2679514{-4}* Year3 .
ggplot(gak1, aes(x = Year, y = Sal, group = Depth, colour = Depth)) +
theme_bw() +
geom_point() +
geom_smooth(method = "loess", formula = y ~ x) +
labs(x = "Time",
y = "Salinity (psu)",
title = "Change in ocean salinity over time",
subtitle = "Resurection Bay, Alaska")
Similarly, the relationship between salinity is nonlinear. Lower depths appear to have increasingly lower salinity than upper depths over time. I fit an ANCOVA model with a cubic model for the mail regression.
sal.lm <- lm(log(Sal)~(Year + I(Year^2) + I(Year^3))*Depth, data = gak1)
summary(sal.lm)
##
## Call:
## lm(formula = log(Sal) ~ (Year + I(Year^2) + I(Year^3)) * Depth,
## data = gak1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49131 -0.00943 0.00345 0.01420 0.11309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.223e+04 5.940e+03 3.742 0.000184 ***
## Year -3.340e+01 8.922e+00 -3.744 0.000183 ***
## I(Year^2) 1.674e-02 4.467e-03 3.746 0.000181 ***
## I(Year^3) -2.795e-06 7.456e-07 -3.749 0.000180 ***
## Depth10 -3.842e+03 8.400e+03 -0.457 0.647448
## Depth100 -2.548e+04 8.452e+03 -3.014 0.002588 **
## Depth150 -2.653e+04 8.466e+03 -3.134 0.001734 **
## Depth20 -1.202e+04 8.400e+03 -1.431 0.152465
## Depth200 -2.607e+04 8.507e+03 -3.065 0.002186 **
## Depth250 -2.645e+04 8.876e+03 -2.980 0.002893 **
## Depth30 -1.832e+04 8.404e+03 -2.180 0.029306 *
## Depth50 -2.402e+04 8.420e+03 -2.853 0.004347 **
## Depth75 -2.573e+04 8.443e+03 -3.048 0.002317 **
## Year:Depth10 5.792e+00 1.262e+01 0.459 0.646256
## Year:Depth100 3.830e+01 1.270e+01 3.017 0.002567 **
## Year:Depth150 3.988e+01 1.272e+01 3.136 0.001720 **
## Year:Depth20 1.808e+01 1.262e+01 1.433 0.152032
## Year:Depth200 3.920e+01 1.278e+01 3.068 0.002168 **
## Year:Depth250 3.976e+01 1.333e+01 2.983 0.002870 **
## Year:Depth30 2.755e+01 1.262e+01 2.182 0.029140 *
## Year:Depth50 3.611e+01 1.265e+01 2.855 0.004315 **
## Year:Depth75 3.869e+01 1.268e+01 3.050 0.002298 **
## I(Year^2):Depth10 -2.910e-03 6.318e-03 -0.461 0.645064
## I(Year^2):Depth100 -1.919e-02 6.357e-03 -3.019 0.002547 **
## I(Year^2):Depth150 -1.998e-02 6.367e-03 -3.139 0.001705 **
## I(Year^2):Depth20 -9.060e-03 6.318e-03 -1.434 0.151600
## I(Year^2):Depth200 -1.964e-02 6.398e-03 -3.070 0.002151 **
## I(Year^2):Depth250 -1.992e-02 6.674e-03 -2.985 0.002847 **
## I(Year^2):Depth30 -1.381e-02 6.321e-03 -2.184 0.028974 *
## I(Year^2):Depth50 -1.810e-02 6.332e-03 -2.858 0.004284 **
## I(Year^2):Depth75 -1.939e-02 6.350e-03 -3.053 0.002279 **
## I(Year^3):Depth10 4.875e-07 1.054e-06 0.462 0.643869
## I(Year^3):Depth100 3.206e-06 1.061e-06 3.021 0.002526 **
## I(Year^3):Depth150 3.338e-06 1.063e-06 3.141 0.001691 **
## I(Year^3):Depth20 1.514e-06 1.054e-06 1.436 0.151166
## I(Year^3):Depth200 3.281e-06 1.068e-06 3.072 0.002133 **
## I(Year^3):Depth250 3.328e-06 1.114e-06 2.988 0.002824 **
## I(Year^3):Depth30 2.307e-06 1.055e-06 2.187 0.028807 *
## I(Year^3):Depth50 3.022e-06 1.057e-06 2.860 0.004252 **
## I(Year^3):Depth75 3.238e-06 1.060e-06 3.055 0.002259 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03833 on 5801 degrees of freedom
## Multiple R-squared: 0.5405, Adjusted R-squared: 0.5374
## F-statistic: 175 on 39 and 5801 DF, p-value: < 2.2e-16
ggplot(gak1, aes(x = Year, y = log(Sal), group = Depth, colour = Depth)) +
theme_bw() +
geom_point() +
geom_smooth(method = "lm", formula = y ~ poly(x, 3, raw = TRUE)) +
labs(x = "Time",
y = "Natural log of salinity (psu)",
title = "Change in ocean salinity over time",
subtitle = "Resurection Bay, Alaska")
Unlike the temperature model, multiple interaction terms are significant, which means the regression lines are different depending on the depth of the sample. Also, unlike the temperature model, the full salinity model has a fairly high R^2 value. The model shows that down to 20 meters, salinity has the same regression model as the surface. It’s only 30 meters and below that show different salinity trends than the surface.
For 0 to 20 meters, the model is the default model: y-hat = 2.2227416^{4} + -33.4036424* Year + 0.0167354* Year^2 + -2.7947854{-6}* Year3 .
For 30 meters and below, the regression model is found by adding the coefficients for the interaction terms to the respective coefficients of the default model. For instance, the model for 250 meters depth is found by adding the Depth250 coefficient to the intercept, the Year:Depth250 coefficient to the Year coefficient, the Year^2:Depth250 coefficient to the Year^2 coefficient, and the Year^3:Depth250 coefficient to the Year^3 coefficient. The end result is this:
y-hat = -4224.1159417 + 6.3587357* Year + -0.003188* Year^2 + 5.3277293{-7}* Year3 .
You can do that to all of the depths or just the ones you’re truly interested in. The same process may be done on the Temperature model above as well.
One warning: The model as fit appears to be heteroskedastic. This would make the standard errors appear smaller than they should be and thereby flag more results as significant. If that is a major concern, then you can check it using the following:
library(sandwich)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
var_cov <- vcovHC(sal.lm, type = "HC4")
coeftest(sal.lm, df = Inf, var_cov)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.2227e+04 1.3593e+04 1.6352 0.10200
## Year -3.3404e+01 2.0158e+01 -1.6571 0.09750 .
## I(Year^2) 1.6735e-02 1.0341e-02 1.6184 0.10557
## I(Year^3) -2.7948e-06 1.6563e-06 -1.6874 0.09153 .
## Depth10 -3.8418e+03 1.6586e+04 -0.2316 0.81682
## Depth100 -2.5476e+04 1.3802e+04 -1.8458 0.06492 .
## Depth150 -2.6530e+04 1.3853e+04 -1.9151 0.05548 .
## Depth20 -1.2022e+04 1.4557e+04 -0.8258 0.40890
## Depth200 -2.6075e+04 1.3965e+04 -1.8672 0.06188 .
## Depth250 -2.6452e+04 1.3367e+04 -1.9788 0.04784 *
## Depth30 -1.8321e+04 1.4493e+04 -1.2641 0.20620
## Depth50 -2.4020e+04 1.4005e+04 -1.7151 0.08632 .
## Depth75 -2.5732e+04 1.4161e+04 -1.8171 0.06920 .
## Year:Depth10 5.7917e+00 2.5172e+01 0.2301 0.81802
## Year:Depth100 3.8298e+01 2.0372e+01 1.8800 0.06011 .
## Year:Depth150 3.9882e+01 2.0952e+01 1.9035 0.05697 .
## Year:Depth20 1.8077e+01 2.2211e+01 0.8139 0.41572
## Year:Depth200 3.9199e+01 2.0109e+01 1.9493 0.05126 .
## Year:Depth250 3.9762e+01 2.0281e+01 1.9606 0.04993 *
## Year:Depth30 2.7548e+01 2.1863e+01 1.2600 0.20767
## Year:Depth50 3.6111e+01 2.1068e+01 1.7141 0.08652 .
## Year:Depth75 3.8685e+01 2.0612e+01 1.8768 0.06054 .
## I(Year^2):Depth10 -2.9103e-03 1.2843e-02 -0.2266 0.82072
## I(Year^2):Depth100 -1.9191e-02 1.0290e-02 -1.8650 0.06218 .
## I(Year^2):Depth150 -1.9985e-02 1.0267e-02 -1.9465 0.05160 .
## I(Year^2):Depth20 -9.0604e-03 1.1037e-02 -0.8209 0.41172
## I(Year^2):Depth200 -1.9643e-02 1.0188e-02 -1.9280 0.05385 .
## I(Year^2):Depth250 -1.9923e-02 1.0151e-02 -1.9627 0.04968 *
## I(Year^2):Depth30 -1.3807e-02 1.0983e-02 -1.2571 0.20872
## I(Year^2):Depth50 -1.8095e-02 1.0171e-02 -1.7792 0.07521 .
## I(Year^2):Depth75 -1.9386e-02 1.0032e-02 -1.9323 0.05332 .
## I(Year^3):Depth10 4.8747e-07 2.1231e-06 0.2296 0.81840
## I(Year^3):Depth100 3.2055e-06 1.7411e-06 1.8411 0.06560 .
## I(Year^3):Depth150 3.3380e-06 1.7394e-06 1.9191 0.05498 .
## I(Year^3):Depth20 1.5137e-06 1.8222e-06 0.8307 0.40614
## I(Year^3):Depth200 3.2809e-06 1.7026e-06 1.9270 0.05398 .
## I(Year^3):Depth250 3.3276e-06 1.6914e-06 1.9673 0.04914 *
## I(Year^3):Depth30 2.3067e-06 1.7894e-06 1.2891 0.19738
## I(Year^3):Depth50 3.0225e-06 1.7361e-06 1.7409 0.08169 .
## I(Year^3):Depth75 3.2381e-06 1.6687e-06 1.9405 0.05232 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The result shows that only the salinity trend at 250 meter depth is truly significantly different from the surface once heteroskedasticity is accounted for.