Determining the effects of lake variables on the relationship of TP (log transformed ug/L) with Chl-a concentrations (log transformed ug/L), using the gradient of residuals against variables.

Linear model of logCHLA~logTP

Initial log-transformed linear model

Scatterplot

#linear model of chlorophyll to phosphorus
lm1 <- lm(logCHLA~logTP, data=df)
#creating residuals
df$res.lm <- resid(lm1)
plot.lm1 <- ggplot(df,aes(x=logTP,y=logCHLA))+
  geom_point()+
  geom_smooth(method='lm')+
  labs(title="Linear log-log model",x="log TP",y="log CHLA")
print(plot.lm1)
## `geom_smooth()` using formula 'y ~ x'

GAM

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## logCHLA ~ s(logTP)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.52699    0.01145   133.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df   F p-value    
## s(logTP) 7.636  8.521 551  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.421   Deviance explained = 42.2%
## GCV = 0.8489  Scale est. = 0.84777   n = 6463

characteristicinterceptslopep.valuersquared
SAMPLE_DATE0.850289148463432-6.89826635426592e-055.46234608554396e-1192.00279661146439e-130
DEPTH, BOTTOM_OW_NA0.183054485000925-0.0112875218818854.5378047443628e-124.70045350964983e-08
NITROGEN, NITRATE-NITRITE_OW_T-0.00356774000397907-0.3042399350618020.7989869540820282.98343585229416e-05
SPECIFIC CONDUCTANCE_OW_NA0.0238878360602011-0.0001512136182866020.3227449695553560.22335561977934

Model statistics

summary(lm1)
## 
## Call:
## lm(formula = logCHLA ~ logTP, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6639 -0.4642  0.0532  0.5741  4.5213 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.92671    0.06888   86.04   <2e-16 ***
## logTP        1.03479    0.01596   64.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9419 on 6461 degrees of freedom
## Multiple R-squared:  0.3941, Adjusted R-squared:  0.394 
## F-statistic:  4202 on 1 and 6461 DF,  p-value: < 2.2e-16
anova(lm1)
DfSum SqMean SqF valuePr(>F)
13.73e+033.73e+034.2e+030
64615.73e+030.887         

Residual plots

Remove outliers and rerun model

Identify outliers

Outliers were identified as points with studentized residuals outside 3:-3

Scatterplot without outliers

print(plot.lm2<- ggplot(df2,aes(x=logTP,y=logCHLA,alpha=0.5))+
        geom_point()+
        geom_abline(intercept=lm2$coefficients[1],slope=lm2$coefficients[2],size=1.1)+
        theme_bw()+
        theme(legend.position = "none")+
        ylim(-2.5,6)+
        xlim(-7.5,-2))
## Warning: Removed 23 rows containing missing values (geom_point).

Model statistics

summary(lm2)
## 
## Call:
## lm(formula = logCHLA ~ logTP, data = df2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9893 -0.4695  0.0344  0.5298  2.8209 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.31345    0.06215  101.58   <2e-16 ***
## logTP        1.11567    0.01440   77.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8247 on 6356 degrees of freedom
## Multiple R-squared:  0.4855, Adjusted R-squared:  0.4855 
## F-statistic:  5999 on 1 and 6356 DF,  p-value: < 2.2e-16
anova(lm2)
DfSum SqMean SqF valuePr(>F)
14.08e+034.08e+036e+030
63564.32e+030.68    

Residual plots

Change in trend

Red is overall trend, green is first year of sampling, blue is most recent year of sampling.

lm.first
## 
## Call:
## lm(formula = logCHLA ~ logTP, data = df3[df3$`sampling event` == 
##     "first", ])
## 
## Coefficients:
## (Intercept)        logTP  
##       6.095        1.036
lm.last
## 
## Call:
## lm(formula = logCHLA ~ logTP, data = df3[df3$`sampling event` == 
##     "last", ])
## 
## Coefficients:
## (Intercept)        logTP  
##       6.081        1.086

Variable effects on model residuals

Relationship of model residuals to lake characteristic variables

Residual plots

Pattern of logCHLA~logTP model residuals along gradients of variables. All variables are fitted to a linear model on the residuals.

# only keep relevant variables
df3 <- df2 %>%
  mutate("Log(N:P)"=log(NPratio)) %>% 
  select(SAMPLE_DATE,
         `DEPTH, SECCHI DISK DEPTH_SD_NA`,
         `Log(N:P)`,
         `TEMPERATURE, WATER_OW_NA`,
         `TRUE COLOR_OW_T`,
         res.lm) %>% 
  rename("Sample date"=SAMPLE_DATE,
         "Secchi disk depth"=`DEPTH, SECCHI DISK DEPTH_SD_NA`,
         "Lake temperature"=`TEMPERATURE, WATER_OW_NA`,
         "True color"=`TRUE COLOR_OW_T`)

#loop
for(i in (2:ncol(df3)-1)){
  lm.df <- df3[is.finite(df3[,i]),] #removing NAs from relevant column
  lm.loop <- lm(res.lm ~ lm.df[,i], data=lm.df) #linear model of residuals against variable
  p1 <- ggplot(df3,aes(x=df3[,i],y=res.lm))+ #scatterplot
    geom_point()+
    geom_smooth(method='lm')+ 
    labs(x=paste(colnames(df3[i])),
         y='residuals of LM(logCHLA~logTP)')
  if(colnames(df3[i])=="Sample date"){
    p1 <- p1+scale_x_date(date_labels="%Y")
  }
  p2 <- ggplot(data=df3, aes(x=df3[,i])) + #density plot
    geom_density(fill="grey") +
    theme(axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.grid=element_blank(),
          axis.line=element_blank(),
          panel.background = element_blank())
  require(gridExtra)
  grid.arrange(p2, p1, ncol = 1, heights = c(1, 4)) #output plots
  stats[nrow(stats)+1,] <- c(colnames(df3[i]), #coefficients into stats table
                             coef(lm.loop),
                             summary(lm.loop)$coefficients[2,4],
                             summary(lm.loop)$r.squared)
  vars[nrow(vars)+1,] <- c(colnames(df3)[i], #coefficients into variables table
                           paste(min(as.numeric(na.omit(df3[,i]))),
                                 "-",max(as.numeric(na.omit(df3[,i])))),
                           nrow(df3[i]),
                           paste(median(as.numeric(na.omit(df3[,i]))),
                                 "(",summary(as.numeric(na.omit(df3[,i])))[2],
                                 "-",summary(as.numeric(na.omit(df3[,i])))[5],")")) #stats
}

Model statistics

Variable descriptive statistics

characteristicrangenmedian..IQR.
Sample date6007 - 18535635812684.5 ( 8230 - 15878.5 )
Secchi disk depth0.05 - 13.663583.3 ( 2 - 4.75 )
Log(N:P)-Inf - 5.6466240608162563580.0143887374520995 ( -0.671485683778766 - 1.0929824056555 )
Lake temperature4 - 36635822.7777777777778 ( 20 - 25 )
True color0 - 222635810 ( 6 - 19 )
N WET DEPOSITION2.30022078752517 - 10.300975799560529235.33139736132514 ( 4.55434440887906 - 6.44047206640243 )
PRECIPITATION76.6439971923828 - 192.6909942626952515105.960250854492 ( 96.1419982910156 - 119.623184926382 )
COLOR (LOW)0 - 2050178 ( 5 - 13 )
COLOR (HIGH)3.04452243772342 - 5.4026773818722814443.40119738166216 ( 3.2188758248682 - 3.73766961828337 )

Coefficients for linear models of residuals along gradient of variables. Bold values indicate significant p-values (<0.05). Sorted by rsquared.

characteristicinterceptslopep.valuersquared
Secchi disk depth0.541803029246122-0.1533404607630372.27804470391924e-1730.119984980885286
Sample date0.781027159754332-6.34600046156702e-054.68974655730493e-1360.0924225582939648
N WET DEPOSITION-0.7552155448816010.136708859049783.12451405622739e-410.0600533806507492
Lake temperature0.356056733585495-0.0155327817582232.46192107503823e-070.00439476951584657
COLOR (LOW)-0.04265564414104830.006390003014437810.003872799446480670.00169712544101299
PRECIPITATION0.242827192111365-0.00110324550418430.2082259972167430.000630136773462971
Log(N:P)-0.013505519227058-0.01329336190118530.0974710010815960.000542896327019578
COLOR (HIGH)-0.224870522583550.05077953643177340.4324357833817940.000460314324769141
True color0.0060336628838756-0.0003126659698118180.6550971973577873.19121762271381e-05

Most recent year

## Error in `$<-.data.frame`(`*tmp*`, "DEPTH, BOTTOM_OW_NA", value = NA_real_): replacement has 1 row, data has 0
## Error in summary(lm.dep)$coefficients[2, 4]: subscript out of bounds

Variable descriptive statistics

characteristicyearsrangenmedian..IQR.
DEPTH, SECCHI DISK DEPTH_SD_NA2018 - 20200.65 - 11.852753.9 ( 2.3 - 5.25 )
NITROGEN, NITRATE-NITRITE_OW_T2018 - 20200.0035 - 0.6772750.01925 ( 0.01 - 0.06655 )
NPratio2018 - 20200.0737463126843658 - 115.5290102389082751.41768029054015 ( 0.685363247863248 - 4.68072493224932 )
TEMPERATURE, WATER_OW_NA2018 - 202015 - 30.527525 ( 22.375 - 26.75 )
TRUE COLOR_OW_T2018 - 20201 - 852757 ( 6 - 13.25 )
LAKE DEPTH2020 - 202011 - 119
ACID DEPOSITION (DIN:TP>1.5)2011 - 20182.30022078752517 - 6.6883175373077382
ACID DEPOISITION (DIN:TP<1.5)2017 - 20182.30022078752517 - 7.62836072437158176
PRECIPITATION2013 - 201676.9628023420061 - 131.316277313232126
NITRATE+NITRITE (DIN:TP<1.5)2017 - 20200.0035 - 0.118140
NITRATE+NITRITE (DIN:TP>1.5)2011 - 20200.01 - 0.67772

Coefficients for linear models of residuals along gradient of variables in most recent year. Bold values indicate significant p-values (<0.05). Sorted by rsquared.

characteristicinterceptslopep.valuersquared
DEPTH, SECCHI DISK DEPTH_SD_NA0.428630517179621-0.1359869622413641.60201682818058e-100.14227166740512
NITROGEN, NITRATE-NITRITE_OW_T0.0016712605690871-1.280160379570150.02908914042057540.054174610820437
NITRATE+NITRITE (DIN:TP>1.5)-0.179891730221681-0.9346344939158560.1222620669528410.0347590611084569
TRUE COLOR_OW_T-0.1939097440813530.007470509452445080.03426468002147770.0164911846704551
NPratio-0.0662176579841025-0.005897299267937490.2566522960758050.0149363811196209
PRECIPITATION0.362357170866486-0.007598182092530730.3270316532561950.0080063144947164
ACID DEPOSITION (DIN:TP>1.5)-0.6552900948193470.07311079360301770.5180442009407210.00551816085938825
ACID DEPOSITION (DIN:TP<1.5)-0.6552900948193470.07311079360301770.5180442009407210.00551816085938825
NITRATE+NITRITE (DIN:TP<1.5)-0.06631242025098330.6836169569891210.8506168351205420.000261725815197304
TEMPERATURE, WATER_OW_NA-0.0512983597476334-0.002158941140775260.8820010251958158.17492733348602e-05

Color bins

High vs low color, sample date [1] “HIGH COLOR, N= 1444” [1] “LOW COLOR, N= 5017”

characteristicinterceptslopep.valuersquared
High color0.977960932533248-7.60055366367557e-052.22703116802528e-220.0682986263223938
Low color0.767216135836936-6.27160009326501e-052.2179012103683e-1190.104058314969691

Color trend, sample date [1] “INCREASING COLOR, N= 4250” [1] “DECREASING COLOR, N= 2108”

characteristicinterceptslopep.valuersquared
Increasing color0.863341279568197-7.26260704715752e-056.22896911385686e-1160.116023927247504
Decreasing color0.658066306787453-4.88466089095998e-058.81348882000011e-300.0591965327510898

Relationship of model residuals to year by lake

Pattern of logCHLA~logTP model residuals along sample date for each lake. Data from each lake are fitted to a linear model on the residuals.

Residual plots

A red regression line indicates the model is statistically insignificant (p>0.05). A red border medians that the lake had a negative chlorophyll trend and a flat or positive phosphorous trend.

#each lake
#loop over each lake
LAKE_IDS <- unique(df$LAKE_ID) #list lakes
coeffs <- data.frame(lake=character(),
                     intercept=double(),
                     slope=double(),
                     'p-value'=double(),
                     rsquared=double()) #blank coefficient table

incongruous <- read.csv("~/OneDrive - New York State Office of Information Technology Services/Rscripts/Trend/chl-tp-lakes.csv")
incongruous <- incongruous[which(incongruous$CHL_slope!=incongruous$PHOS_slope),]
incongruous <- incongruous[incongruous$CHL_slope=="neg",]

for (i in 1:length(LAKE_IDS)) {
  lake <- LAKE_IDS[[i]] #pick a lake
  dat <- df2[df2$LAKE_ID == lake,] #create new data with that lake
  resid.loop <- lm(logCHLA~logTP,data=dat) #linear model of chl vs tp
  dat$res.loop <- resid(resid.loop) #residuals for that lake
  lm.loop <- lm(res.loop~SAMPLE_DATE,data=dat) #linear model of residuals vs year
  co <- coef(lm.loop)
  plot <- ggplot(dat,aes(x=SAMPLE_DATE,y=res.loop))+ #plot lake
    geom_point() +
    labs(title=paste(lake),x='year',y='residuals of LM(logCHLA~logTP)')
  if(summary(lm.loop)$coefficients[,4]<0.05){ #plot significant lakes
    plot <- plot + geom_smooth(method='lm')
  }else{
    plot <- plot + geom_smooth(method='lm',aes(color="red"),show.legend=FALSE) #plot insignificant lakes
  }
  if(lake %in% incongruous$LAKE_ID){
    plot <- plot + theme(panel.border = element_rect(colour = "red", fill=NA, size=3)) #color incongruous lakes
  }
  print(plot) #print
  coeffs[nrow(coeffs)+1,] <- c(lake,
                               co,
                               summary(lm.loop)$coefficients[2,4],
                               summary(lm.loop)$r.squared)
}

Model statistics

Coefficients for linear models of residuals along gradient of time for each lake. Bold values indicate significant p-values (<0.05). Asterisks (***) indicate that the lake had a negative chlorophyll trend and a flat or positive phosphorous trend.

lakeinterceptslopep.valuersquared
0202CHA01220.944152825865916-7.53079675136563e-059.56194504310585e-120.173548887674343
0202FIN01530.635749577648235-5.9507857767644e-050.0001745702049316180.0712769366388969
0302SOD00960.48666769255914-3.82719485604449e-050.01064145391406540.0540104682370989
0403SIL01150.502854823465325-4.10077102364865e-050.001832053827526770.0576617991573737
0602BRA01541.06149274566162-8.763415619743e-051.9026283344591e-110.255844767247595
0602CRO0073-0.02796840063245712.34944278403729e-060.8461006842089560.000251961954288501
0602EAR01460.851959664465121-6.94640784852235e-054.50387751003872e-060.124292588234536
0602EAT01631.3623772971199-0.0001121181180475241.6356436919286e-150.344014792791017
0602HAT01550.593254365900425-4.89064089889259e-052.94395645687316e-050.106817806226883
0602LEB01531.11902446562697-9.78474627625745e-051.5274614012525e-110.271692914186169
0602MEL00390.63727113476422-5.08639374049919e-056.48105278768945e-060.0866190932257855
0602MOR01520.891944581461988-7.51301268569577e-052.05080228334012e-080.113300646478107
0602PET00780.331789456726402-2.531689333677e-050.03690766211523440.0234515777766317
0703CAZ01530.449212205317357-3.63154232384546e-050.002837508288509360.0539950072335631
0703DER0139A1.53095378510904-0.0001257336562497147.94950449808548e-270.373941960810729
0703TUS0153A0.949073117601567-8.1672021690225e-052.25647663987518e-100.148365549026748
0705COM03330.673891148746326-5.21493736896624e-050.0003186333594933140.064448084168005
0705DUC02221.14655293221319-9.03839962163188e-058.00695783808612e-070.17024507008015
0801SEC0782B0.129851054598145-1.10468600256644e-050.2464082075649070.00725564585045497
0906BLA00011.10590132446955-9.06208131345451e-056.25783448358663e-080.127600869619626
0906BON00240.114791426411991-8.89386232928277e-060.2453897593322910.0079291818651228
0906BUT00540.796388583921065-6.64826965822309e-051.57274435460662e-060.0944090894268697
1005GLE04410.367866424215461-2.90789951287784e-050.01361802827230280.0361108061211256
1102BAB11090.730036058917398-5.88644234673037e-050.0001889775104942150.0604552801273828
1104GOO0672A0.607548239418278-5.2413319085535e-050.001324578267038120.0722238220775747
1104SAC03140.58979347330651-5.07916555060258e-052.63834051446714e-050.127488749571478
1104SCH03741.52501684883298-0.0001162133810052245.64158238577791e-100.202904429881148
1302CAR0062A0.435295439195984-3.9720317605433e-050.004321720767709940.0962057148638054
1302WAC01170.0706969591230926-5.66313319549475e-060.6023488490061150.00151966569190011
1308ROB09020.728387033604061-5.34849245969702e-050.004326189052573730.0648110381252875
1310QUE00570.508237857356091-3.99890117026577e-050.004478729614097160.0398814593146024
1401ANA02510.467390753646235-3.66754111085952e-050.003875740123622690.0386700308115073
1402WOL00371.12013529268377-9.92151035598942e-051.83213731872521e-080.231067618390379
1404OQU03831.09146002550101-8.28530634906585e-057.14167052707757e-080.150071736337309
1501LUC0982B-0.2171361363638661.76125478750811e-050.3800005533774810.00714356847465035
1701LFR06621.42885257134606-0.0001053740542641063.28397374780158e-060.119867716701038

Distribution of slopes

Dot plot of slope for each lake. Grey dots are statistically insignificant (p>0.05).

Maps