knitr::opts_chunk$set(echo = TRUE)
library(lubridate)
library(ggplot2)
library(tidyr)
library(dplyr)
library(broom)

Temperature and Salinity vs Depth

A student asked me to help him with an analysis of ocean temperature and salinity data taken at the GAK1 oceanographic research station located at the mouth of Resurrection Bay in Alaska, USA. The data set starts in 1970 and continues to the present. You can read about it here: http://research.cfos.uaf.edu/gak1/

gak1 <- read.csv("http://research.cfos.uaf.edu/gak1/data/TimeSeries/gak1.csv")
gak1 <- gak1[-c(1,2),]
gak1$Depth <- as.factor(gak1$Depth)
gak1$Temp <- as.numeric(gak1$Temp)
gak1$Sal <- as.numeric(gak1$Sal)
gak1$Year <- as.numeric(gak1$Year)
gak1$ConvertedYear <- date_decimal(gak1$Year, tz = "US/Alaska")
gak1$Sigma <- as.numeric(gak1$Sigma)
gak1$Delta.D <- as.numeric(gak1$Delta.D)
str(gak1)
## 'data.frame':    5841 obs. of  9 variables:
##  $ Cruise       : chr  "AC109" "AC109" "AC109" "AC109" ...
##  $ St           : chr  "726" "726" "726" "726" ...
##  $ Year         : num  1971 1971 1971 1971 1971 ...
##  $ Depth        : Factor w/ 10 levels "0","10","100",..: 1 2 5 8 9 10 3 4 6 7 ...
##  $ Temp         : num  4.91 5.01 5.15 5.16 5.17 5.3 5.5 6.23 6.45 5.58 ...
##  $ Sal          : num  30.6 30.7 30.7 30.7 30.8 ...
##  $ Sigma        : num  24.2 24.2 24.3 24.3 24.3 ...
##  $ Delta.D      : num  0 0.0371 0.074 0.1107 0.1838 ...
##  $ ConvertedYear: POSIXct, format: "1970-12-15 12:18:38" "1970-12-15 12:18:38" ...
summary(gak1)
##     Cruise               St                 Year          Depth     
##  Length:5841        Length:5841        Min.   :1971   0      : 599  
##  Class :character   Class :character   1st Qu.:1989   10     : 598  
##  Mode  :character   Mode  :character   Median :2000   20     : 598  
##                                        Mean   :1998   30     : 596  
##                                        3rd Qu.:2007   50     : 593  
##                                        Max.   :2021   75     : 587  
##                                                       (Other):2270  
##       Temp             Sal            Sigma          Delta.D       
##  Min.   : 1.730   Min.   :17.53   Min.   :13.02   Min.   :0.00000  
##  1st Qu.: 5.141   1st Qu.:30.84   1st Qu.:24.09   1st Qu.:0.08356  
##  Median : 5.979   Median :31.55   Median :24.88   Median :0.23781  
##  Mean   : 6.724   Mean   :31.22   Mean   :24.45   Mean   :0.29290  
##  3rd Qu.: 7.693   3rd Qu.:32.22   3rd Qu.:25.39   3rd Qu.:0.47702  
##  Max.   :15.902   Max.   :33.60   Max.   :26.52   Max.   :0.89487  
##                                                                    
##  ConvertedYear                
##  Min.   :1970-12-15 12:18:38  
##  1st Qu.:1989-05-28 17:49:17  
##  Median :2000-02-23 12:34:19  
##  Mean   :1998-07-01 15:08:18  
##  3rd Qu.:2007-05-10 19:00:17  
##  Max.   :2020-09-01 20:00:09  
## 

The data set consists of 5841 datapoints, with measurements of temperature (Temp) and salinity (Sal) taken at ten different depths, from the surface down to 250 meters deep.

###Temperature vs Time and Depth

ggplot(gak1, aes(x = ConvertedYear, y = Temp, group = Depth, colour = Depth)) +
        theme_bw() +
        geom_point() +
        geom_smooth(method = "loess", formula = y ~ x)

        labs(x = "Year",
             y = "Temperature (ºC)",
             title = "Ocean temperature over time",
             subtitle = "Resurection Bay, Alaska")
## $x
## [1] "Year"
## 
## $y
## [1] "Temperature (ºC)"
## 
## $title
## [1] "Ocean temperature over time"
## 
## $subtitle
## [1] "Resurection Bay, Alaska"
## 
## attr(,"class")
## [1] "labels"

The plot reveals that the relationship between tempeature and time is nonlinear, accelerating in the last few years, especially close to the surface. I then fit a cubic ANCOVA model to the data.

temp.lm <- lm(Temp~(Year + I(Year^2) + I(Year^3))*Depth, data = gak1)
summary(temp.lm)
## 
## Call:
## lm(formula = Temp ~ (Year + I(Year^2) + I(Year^3)) * Depth, data = gak1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4541 -1.4564 -0.1326  1.0063  8.0369 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -1.572e+06  3.548e+05  -4.431 9.57e-06 ***
## Year                2.363e+03  5.330e+02   4.433 9.46e-06 ***
## I(Year^2)          -1.184e+00  2.669e-01  -4.436 9.35e-06 ***
## I(Year^3)           1.977e-04  4.453e-05   4.438 9.24e-06 ***
## Depth10             2.052e+04  5.018e+05   0.041   0.9674    
## Depth100            9.039e+05  5.049e+05   1.790   0.0734 .  
## Depth150            8.862e+05  5.057e+05   1.752   0.0797 .  
## Depth20             1.039e+05  5.018e+05   0.207   0.8360    
## Depth200            9.391e+05  5.082e+05   1.848   0.0647 .  
## Depth250            9.817e+05  5.302e+05   1.852   0.0641 .  
## Depth30             3.749e+05  5.020e+05   0.747   0.4552    
## Depth50             6.585e+05  5.029e+05   1.309   0.1905    
## Depth75             8.823e+05  5.044e+05   1.749   0.0803 .  
## Year:Depth10       -3.096e+01  7.537e+02  -0.041   0.9672    
## Year:Depth100      -1.360e+03  7.584e+02  -1.793   0.0730 .  
## Year:Depth150      -1.333e+03  7.596e+02  -1.755   0.0793 .  
## Year:Depth20       -1.560e+02  7.537e+02  -0.207   0.8360    
## Year:Depth200      -1.413e+03  7.633e+02  -1.851   0.0643 .  
## Year:Depth250      -1.476e+03  7.963e+02  -1.854   0.0638 .  
## Year:Depth30       -5.634e+02  7.541e+02  -0.747   0.4550    
## Year:Depth50       -9.893e+02  7.555e+02  -1.309   0.1904    
## Year:Depth75       -1.327e+03  7.576e+02  -1.751   0.0800 .  
## I(Year^2):Depth10   1.556e-02  3.774e-01   0.041   0.9671    
## I(Year^2):Depth100  6.817e-01  3.797e-01   1.795   0.0726 .  
## I(Year^2):Depth150  6.686e-01  3.803e-01   1.758   0.0788 .  
## I(Year^2):Depth20   7.811e-02  3.774e-01   0.207   0.8360    
## I(Year^2):Depth200  7.082e-01  3.822e-01   1.853   0.0639 .  
## I(Year^2):Depth250  7.402e-01  3.987e-01   1.857   0.0634 .  
## I(Year^2):Depth30   2.822e-01  3.776e-01   0.748   0.4548    
## I(Year^2):Depth50   4.954e-01  3.783e-01   1.310   0.1904    
## I(Year^2):Depth75   6.650e-01  3.793e-01   1.753   0.0796 .  
## I(Year^3):Depth10  -2.607e-06  6.298e-05  -0.041   0.9670    
## I(Year^3):Depth100 -1.139e-04  6.337e-05  -1.798   0.0722 .  
## I(Year^3):Depth150 -1.118e-04  6.348e-05  -1.761   0.0783 .  
## I(Year^3):Depth20  -1.303e-05  6.298e-05  -0.207   0.8361    
## I(Year^3):Depth200 -1.184e-04  6.379e-05  -1.856   0.0636 .  
## I(Year^3):Depth250 -1.237e-04  6.653e-05  -1.859   0.0631 .  
## I(Year^3):Depth30  -4.713e-05  6.301e-05  -0.748   0.4546    
## I(Year^3):Depth50  -8.269e-05  6.313e-05  -1.310   0.1903    
## I(Year^3):Depth75  -1.111e-04  6.331e-05  -1.755   0.0793 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.289 on 5801 degrees of freedom
## Multiple R-squared:  0.1565, Adjusted R-squared:  0.1508 
## F-statistic:  27.6 on 39 and 5801 DF,  p-value: < 2.2e-16
temp.lm <- lm(Temp~Year + I(Year^2) + I(Year^3), data = gak1)
ggplot(gak1, aes(x = Year, y = Temp, group = Depth, colour = Depth)) +
        theme_bw() +
        geom_point() +
        geom_smooth(method = "lm", formula = y ~ poly(x, 3, raw = TRUE)) +
        labs(x = "Time",
             y = "Temperature (ºC)",
             title = "Change in ocean temperature over time",
             subtitle = "Resurection Bay, Alaska")

The good news is that there are no signficant differences in the rate of temperature change, although some of the different depths come close and the R^2 for the full model explains significantly more of the variation than the simpler model without depth. The main regression model for ocean temperature at the GAK1 research station is simply the default -1.0092598^{6} + 1516.4649482* Year + -0.7595061* Year^2 + 1.2679514{-4}* Year3 .

Salinity vs Time and Depth

ggplot(gak1, aes(x = Year, y = Sal, group = Depth, colour = Depth)) +
        theme_bw() +
        geom_point() +
        geom_smooth(method = "loess", formula = y ~ x) +
        labs(x = "Time",
             y = "Salinity (psu)",
             title = "Change in ocean salinity over time",
             subtitle = "Resurection Bay, Alaska")

Similarly, the relationship between salinity is nonlinear. Lower depths appear to have increasingly lower salinity than upper depths over time. I fit an ANCOVA model with a cubic model for the mail regression.

sal.lm <- lm(log(Sal)~(Year + I(Year^2) + I(Year^3))*Depth, data = gak1)
summary(sal.lm)
## 
## Call:
## lm(formula = log(Sal) ~ (Year + I(Year^2) + I(Year^3)) * Depth, 
##     data = gak1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49131 -0.00943  0.00345  0.01420  0.11309 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.223e+04  5.940e+03   3.742 0.000184 ***
## Year               -3.340e+01  8.922e+00  -3.744 0.000183 ***
## I(Year^2)           1.674e-02  4.467e-03   3.746 0.000181 ***
## I(Year^3)          -2.795e-06  7.456e-07  -3.749 0.000180 ***
## Depth10            -3.842e+03  8.400e+03  -0.457 0.647448    
## Depth100           -2.548e+04  8.452e+03  -3.014 0.002588 ** 
## Depth150           -2.653e+04  8.466e+03  -3.134 0.001734 ** 
## Depth20            -1.202e+04  8.400e+03  -1.431 0.152465    
## Depth200           -2.607e+04  8.507e+03  -3.065 0.002186 ** 
## Depth250           -2.645e+04  8.876e+03  -2.980 0.002893 ** 
## Depth30            -1.832e+04  8.404e+03  -2.180 0.029306 *  
## Depth50            -2.402e+04  8.420e+03  -2.853 0.004347 ** 
## Depth75            -2.573e+04  8.443e+03  -3.048 0.002317 ** 
## Year:Depth10        5.792e+00  1.262e+01   0.459 0.646256    
## Year:Depth100       3.830e+01  1.270e+01   3.017 0.002567 ** 
## Year:Depth150       3.988e+01  1.272e+01   3.136 0.001720 ** 
## Year:Depth20        1.808e+01  1.262e+01   1.433 0.152032    
## Year:Depth200       3.920e+01  1.278e+01   3.068 0.002168 ** 
## Year:Depth250       3.976e+01  1.333e+01   2.983 0.002870 ** 
## Year:Depth30        2.755e+01  1.262e+01   2.182 0.029140 *  
## Year:Depth50        3.611e+01  1.265e+01   2.855 0.004315 ** 
## Year:Depth75        3.869e+01  1.268e+01   3.050 0.002298 ** 
## I(Year^2):Depth10  -2.910e-03  6.318e-03  -0.461 0.645064    
## I(Year^2):Depth100 -1.919e-02  6.357e-03  -3.019 0.002547 ** 
## I(Year^2):Depth150 -1.998e-02  6.367e-03  -3.139 0.001705 ** 
## I(Year^2):Depth20  -9.060e-03  6.318e-03  -1.434 0.151600    
## I(Year^2):Depth200 -1.964e-02  6.398e-03  -3.070 0.002151 ** 
## I(Year^2):Depth250 -1.992e-02  6.674e-03  -2.985 0.002847 ** 
## I(Year^2):Depth30  -1.381e-02  6.321e-03  -2.184 0.028974 *  
## I(Year^2):Depth50  -1.810e-02  6.332e-03  -2.858 0.004284 ** 
## I(Year^2):Depth75  -1.939e-02  6.350e-03  -3.053 0.002279 ** 
## I(Year^3):Depth10   4.875e-07  1.054e-06   0.462 0.643869    
## I(Year^3):Depth100  3.206e-06  1.061e-06   3.021 0.002526 ** 
## I(Year^3):Depth150  3.338e-06  1.063e-06   3.141 0.001691 ** 
## I(Year^3):Depth20   1.514e-06  1.054e-06   1.436 0.151166    
## I(Year^3):Depth200  3.281e-06  1.068e-06   3.072 0.002133 ** 
## I(Year^3):Depth250  3.328e-06  1.114e-06   2.988 0.002824 ** 
## I(Year^3):Depth30   2.307e-06  1.055e-06   2.187 0.028807 *  
## I(Year^3):Depth50   3.022e-06  1.057e-06   2.860 0.004252 ** 
## I(Year^3):Depth75   3.238e-06  1.060e-06   3.055 0.002259 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03833 on 5801 degrees of freedom
## Multiple R-squared:  0.5405, Adjusted R-squared:  0.5374 
## F-statistic:   175 on 39 and 5801 DF,  p-value: < 2.2e-16
ggplot(gak1, aes(x = Year, y = log(Sal), group = Depth, colour = Depth)) +
        theme_bw() +
        geom_point() +
        geom_smooth(method = "lm", formula = y ~ poly(x, 3, raw = TRUE)) +
        labs(x = "Time",
             y = "Natural log of salinity (psu)",
             title = "Change in ocean salinity over time",
             subtitle = "Resurection Bay, Alaska")

Unlike the temperature model, multiple interaction terms are significant, which means the regression lines are different depending on the depth of the sample. Also, unlike the temperature model, the full salinity model has a fairly high R^2 value. The model shows that down to 20 meters, salinity has the same regression model as the surface. It’s only 30 meters and below that show different salinity trends than the surface.

For 0 to 20 meters, the model is the default model: y-hat = 2.2227416^{4} + -33.4036424* Year + 0.0167354* Year^2 + -2.7947854{-6}* Year3 .

For 30 meters and below, the regression model is found by adding the coefficients for the interaction terms to the respective coefficients of the default model. For instance, the model for 250 meters depth is found by adding the Depth250 coefficient to the intercept, the Year:Depth250 coefficient to the Year coefficient, the Year^2:Depth250 coefficient to the Year^2 coefficient, and the Year^3:Depth250 coefficient to the Year^3 coefficient. The end result is this:

y-hat = -4224.1159417 + 6.3587357* Year + -0.003188* Year^2 + 5.3277293{-7}* Year3 .

You can do that to all of the depths or just the ones you’re truly interested in. The same process may be done on the Temperature model above as well.

One warning: The model as fit appears to be heteroskedastic. This would make the standard errors appear smaller than they should be and thereby flag more results as significant. If that is a major concern, then you can check it using the following:

library(sandwich)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
var_cov <- vcovHC(sal.lm, type = "HC4")
coeftest(sal.lm, df = Inf, var_cov)
## 
## z test of coefficients:
## 
##                       Estimate  Std. Error z value Pr(>|z|)  
## (Intercept)         2.2227e+04  1.3593e+04  1.6352  0.10200  
## Year               -3.3404e+01  2.0158e+01 -1.6571  0.09750 .
## I(Year^2)           1.6735e-02  1.0341e-02  1.6184  0.10557  
## I(Year^3)          -2.7948e-06  1.6563e-06 -1.6874  0.09153 .
## Depth10            -3.8418e+03  1.6586e+04 -0.2316  0.81682  
## Depth100           -2.5476e+04  1.3802e+04 -1.8458  0.06492 .
## Depth150           -2.6530e+04  1.3853e+04 -1.9151  0.05548 .
## Depth20            -1.2022e+04  1.4557e+04 -0.8258  0.40890  
## Depth200           -2.6075e+04  1.3965e+04 -1.8672  0.06188 .
## Depth250           -2.6452e+04  1.3367e+04 -1.9788  0.04784 *
## Depth30            -1.8321e+04  1.4493e+04 -1.2641  0.20620  
## Depth50            -2.4020e+04  1.4005e+04 -1.7151  0.08632 .
## Depth75            -2.5732e+04  1.4161e+04 -1.8171  0.06920 .
## Year:Depth10        5.7917e+00  2.5172e+01  0.2301  0.81802  
## Year:Depth100       3.8298e+01  2.0372e+01  1.8800  0.06011 .
## Year:Depth150       3.9882e+01  2.0952e+01  1.9035  0.05697 .
## Year:Depth20        1.8077e+01  2.2211e+01  0.8139  0.41572  
## Year:Depth200       3.9199e+01  2.0109e+01  1.9493  0.05126 .
## Year:Depth250       3.9762e+01  2.0281e+01  1.9606  0.04993 *
## Year:Depth30        2.7548e+01  2.1863e+01  1.2600  0.20767  
## Year:Depth50        3.6111e+01  2.1068e+01  1.7141  0.08652 .
## Year:Depth75        3.8685e+01  2.0612e+01  1.8768  0.06054 .
## I(Year^2):Depth10  -2.9103e-03  1.2843e-02 -0.2266  0.82072  
## I(Year^2):Depth100 -1.9191e-02  1.0290e-02 -1.8650  0.06218 .
## I(Year^2):Depth150 -1.9985e-02  1.0267e-02 -1.9465  0.05160 .
## I(Year^2):Depth20  -9.0604e-03  1.1037e-02 -0.8209  0.41172  
## I(Year^2):Depth200 -1.9643e-02  1.0188e-02 -1.9280  0.05385 .
## I(Year^2):Depth250 -1.9923e-02  1.0151e-02 -1.9627  0.04968 *
## I(Year^2):Depth30  -1.3807e-02  1.0983e-02 -1.2571  0.20872  
## I(Year^2):Depth50  -1.8095e-02  1.0171e-02 -1.7792  0.07521 .
## I(Year^2):Depth75  -1.9386e-02  1.0032e-02 -1.9323  0.05332 .
## I(Year^3):Depth10   4.8747e-07  2.1231e-06  0.2296  0.81840  
## I(Year^3):Depth100  3.2055e-06  1.7411e-06  1.8411  0.06560 .
## I(Year^3):Depth150  3.3380e-06  1.7394e-06  1.9191  0.05498 .
## I(Year^3):Depth20   1.5137e-06  1.8222e-06  0.8307  0.40614  
## I(Year^3):Depth200  3.2809e-06  1.7026e-06  1.9270  0.05398 .
## I(Year^3):Depth250  3.3276e-06  1.6914e-06  1.9673  0.04914 *
## I(Year^3):Depth30   2.3067e-06  1.7894e-06  1.2891  0.19738  
## I(Year^3):Depth50   3.0225e-06  1.7361e-06  1.7409  0.08169 .
## I(Year^3):Depth75   3.2381e-06  1.6687e-06  1.9405  0.05232 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The result shows that only the salinity trend at 250 meter depth is truly significantly different from the surface once heteroskedasticity is accounted for.