These are the codes I used to run the multiple and simple linear regression analysis on the correlations between axis 1/2 scores and water chemistry measurements from B1, B5, B7, B9, and B9A, which correspond to five water sampling stations designated by the Baltimore County.
I am going to use data from data.scores.dplyr (Check my NMDS Combined Density) and from wc (Check my PCA Water Chemistry). I will use dplyr package to combine and filter data to fit purpose.
library(readxl)
wc <- read_excel("F:/Thesis stuff/Data Analysis/Part a (87-09)/PCA (Cl, Ca, SC)/Data for PCA.xlsx")
data.scores.dplyr <- read_excel("C:/Users/tiena/OneDrive/Documents/thesis-codes/Part a (87-09)/Multiple regression between WC and axis scores/NMDS scores-dplyr.xlsx")
library(dplyr)
wc_dplyr <- wc %>% filter(!row_number() %in% c(6,12)) %>% group_by(Year) %>% arrange(match(Site,c("RR1","RR2","RR5","RR4","RR3")))
#delete row 6 and 12 that contains data from RR6, which does not match with any benthic site
#rearrange rows so that water quality sites will match benthic sites
mp_data <- data.scores.dplyr %>% filter(!row_number() %in% c(2,3, 4,7,10,12,13,14,17,20)) %>% arrange(match(Site,c("B1","B5","B7","B9","B9A"))) %>% relocate(Site, Year, Subwatershed) %>% mutate(TSS=wc_dplyr$TSS,TKN=wc_dplyr$TKN,TP=wc_dplyr$TP,
Cl=wc_dplyr$Cl,Ca=wc_dplyr$Ca,SC=wc_dplyr$SC)
#remove rows that contain data from benthic sites that do not match with water quality sites
#rearrange sites
#add columns of WC data using mutate function
#relocate Site, Year, Subwatershed columns to the front; you can use relocate(column a, .after=column b) => this will put column a after column b
Now that the data are ready to be used. The rest is easy and straighforward.
summary(lm(wc$Cl~wc$Ca))
##
## Call:
## lm(formula = wc$Cl ~ wc$Ca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.9532 -9.6215 -0.9916 5.9425 21.9772
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.7734 5.4953 -1.415 0.188
## wc$Ca 3.9665 0.3166 12.527 1.95e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.39 on 10 degrees of freedom
## Multiple R-squared: 0.9401, Adjusted R-squared: 0.9341
## F-statistic: 156.9 on 1 and 10 DF, p-value: 1.949e-07
summary(lm(wc$Cl~wc$SC))
##
## Call:
## lm(formula = wc$Cl ~ wc$SC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.2534 -6.7331 -0.8353 9.1377 19.1564
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -92.96810 13.53246 -6.87 4.35e-05 ***
## wc$SC 0.40933 0.03835 10.67 8.73e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.38 on 10 degrees of freedom
## Multiple R-squared: 0.9193, Adjusted R-squared: 0.9112
## F-statistic: 113.9 on 1 and 10 DF, p-value: 8.726e-07
summary(lm(wc$SC~wc$Ca))
##
## Call:
## lm(formula = wc$SC ~ wc$Ca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -61.05 -31.13 -13.86 29.73 72.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 220.028 20.970 10.493 1.02e-06 ***
## wc$Ca 8.788 1.208 7.273 2.69e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.26 on 10 degrees of freedom
## Multiple R-squared: 0.841, Adjusted R-squared: 0.8251
## F-statistic: 52.89 on 1 and 10 DF, p-value: 2.685e-05
summary(lm(wc$TKN~wc$TP))
##
## Call:
## lm(formula = wc$TKN ~ wc$TP)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4861 -0.2937 -0.0606 0.2326 0.9371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1756 0.2756 0.637 0.5384
## wc$TP 26.0244 13.4070 1.941 0.0809 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4287 on 10 degrees of freedom
## Multiple R-squared: 0.2737, Adjusted R-squared: 0.201
## F-statistic: 3.768 on 1 and 10 DF, p-value: 0.08093
summary(lm(wc$TKN~wc$TSS))
##
## Call:
## lm(formula = wc$TKN ~ wc$TSS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5055 -0.2990 -0.1373 0.3313 1.0528
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50478 0.28044 1.800 0.102
## wc$TSS 0.09228 0.14976 0.616 0.552
##
## Residual standard error: 0.4937 on 10 degrees of freedom
## Multiple R-squared: 0.03658, Adjusted R-squared: -0.05976
## F-statistic: 0.3797 on 1 and 10 DF, p-value: 0.5515
summary(lm(wc$TSS~wc$TP))
##
## Call:
## lm(formula = wc$TSS ~ wc$TP)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.21456 -0.47632 0.02498 0.11853 2.19952
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5230 0.5482 0.954 0.3626
## wc$TP 59.3297 26.6663 2.225 0.0503 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8526 on 10 degrees of freedom
## Multiple R-squared: 0.3311, Adjusted R-squared: 0.2642
## F-statistic: 4.95 on 1 and 10 DF, p-value: 0.05028
axis1_mp_salinity <- lm(data=mp_data,NMDS1~Cl+SC)
summary(axis1_mp_salinity)
##
## Call:
## lm(formula = NMDS1 ~ Cl + SC, data = mp_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.47515 -0.17302 -0.05439 0.17448 0.62048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1306331 1.0505723 -0.124 0.905
## Cl 0.0088612 0.0114870 0.771 0.466
## SC -0.0009014 0.0047031 -0.192 0.853
##
## Residual standard error: 0.355 on 7 degrees of freedom
## Multiple R-squared: 0.5419, Adjusted R-squared: 0.4111
## F-statistic: 4.141 on 2 and 7 DF, p-value: 0.06504
axis1_mp_nutrient <- lm(data=mp_data,NMDS1~TKN+TP+TSS)
summary(axis1_mp_nutrient)
##
## Call:
## lm(formula = NMDS1 ~ TKN + TP + TSS, data = mp_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44711 -0.22498 0.05318 0.18532 0.39612
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4426 0.3012 -1.469 0.192
## TKN 0.2660 0.2741 0.970 0.369
## TP 35.1287 18.3836 1.911 0.105
## TSS -0.2363 0.1316 -1.796 0.123
##
## Residual standard error: 0.3316 on 6 degrees of freedom
## Multiple R-squared: 0.6575, Adjusted R-squared: 0.4862
## F-statistic: 3.839 on 3 and 6 DF, p-value: 0.07572
axis1_mp <- lm(data=mp_data, NMDS1~Cl+SC+Ca+TKN+TP+TSS)
summary(axis1_mp) #Ca is significant in this model
##
## Call:
## lm(formula = NMDS1 ~ Cl + SC + Ca + TKN + TP + TSS, data = mp_data)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## 0.13140 0.14999 -0.18583 0.02095 0.07390 -0.05518 -0.11935 0.06019
## 9 10
## 0.04475 -0.12082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.575290 1.339134 1.176 0.3243
## Cl 0.047446 0.017828 2.661 0.0763 .
## SC -0.006430 0.005313 -1.210 0.3129
## Ca -0.106351 0.031416 -3.385 0.0429 *
## TKN -0.567989 0.409654 -1.387 0.2597
## TP 30.328229 12.423306 2.441 0.0924 .
## TSS -0.183462 0.094892 -1.933 0.1487
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1981 on 3 degrees of freedom
## Multiple R-squared: 0.9389, Adjusted R-squared: 0.8166
## F-statistic: 7.681 on 6 and 3 DF, p-value: 0.06136
axis2_mp_salinity <- lm(data=mp_data,NMDS2~Cl+SC)
summary(axis2_mp_salinity)
##
## Call:
## lm(formula = NMDS2 ~ Cl + SC, data = mp_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56505 -0.07617 -0.02667 0.07777 0.53247
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1313292 0.9438433 0.139 0.893
## Cl -0.0058160 0.0103200 -0.564 0.591
## SC 0.0007806 0.0042253 0.185 0.859
##
## Residual standard error: 0.3189 on 7 degrees of freedom
## Multiple R-squared: 0.3382, Adjusted R-squared: 0.1491
## F-statistic: 1.789 on 2 and 7 DF, p-value: 0.2358
axis2_mp_nutrient <- lm(data=mp_data,NMDS2~TKN+TP+TSS)
summary(axis2_mp_nutrient) #Intercept significant
##
## Call:
## lm(formula = NMDS2 ~ TKN + TP + TSS, data = mp_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.21581 -0.13799 -0.01294 0.06911 0.37054
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.84493 0.20818 4.059 0.00666 **
## TKN -0.07720 0.18946 -0.407 0.69780
## TP -21.88838 12.70743 -1.722 0.13576
## TSS -0.14582 0.09097 -1.603 0.16006
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2292 on 6 degrees of freedom
## Multiple R-squared: 0.707, Adjusted R-squared: 0.5605
## F-statistic: 4.827 on 3 and 6 DF, p-value: 0.04856
axis2_mp <- lm(data=mp_data, NMDS2~Cl+SC+Ca+TKN+TP+TSS)
summary(axis2_mp)
##
## Call:
## lm(formula = NMDS2 ~ Cl + SC + Ca + TKN + TP + TSS, data = mp_data)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -0.08545 -0.09228 0.02792 -0.10742 -0.16153 0.05909 0.29238 -0.03689
## 9 10
## -0.02093 0.12511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.027282 1.564488 1.296 0.286
## Cl 0.010909 0.020828 0.524 0.637
## SC -0.004863 0.006208 -0.783 0.491
## Ca -0.005085 0.036702 -0.139 0.899
## TKN -0.157523 0.478592 -0.329 0.764
## TP -10.458560 14.513948 -0.721 0.523
## TSS -0.236207 0.110861 -2.131 0.123
##
## Residual standard error: 0.2314 on 3 degrees of freedom
## Multiple R-squared: 0.8507, Adjusted R-squared: 0.552
## F-statistic: 2.849 on 6 and 3 DF, p-value: 0.2096
I am going to examine the correlation between NMDS1/2 and one water chemistry variable at a time.
cor.test(mp_data$NMDS1,mp_data$SC,method="pearson")#significant
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS1 and mp_data$SC
## t = 2.8455, df = 8, p-value = 0.02163
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1438291 0.9255510
## sample estimates:
## cor
## 0.7092286
cor.test(mp_data$NMDS1,mp_data$Cl,method="pearson")#significant
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS1 and mp_data$Cl
## t = 3.0617, df = 8, p-value = 0.01554
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1951711 0.9327708
## sample estimates:
## cor
## 0.7345342
cor.test(mp_data$NMDS1,mp_data$Ca,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS1 and mp_data$Ca
## t = 2.2441, df = 8, p-value = 0.05507
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01328908 0.89925336
## sample estimates:
## cor
## 0.6215377
cor.test(mp_data$NMDS1,mp_data$TKN,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS1 and mp_data$TKN
## t = 2.2367, df = 8, p-value = 0.05571
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01533561 0.89886097
## sample estimates:
## cor
## 0.6202799
cor.test(mp_data$NMDS1,mp_data$TSS,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS1 and mp_data$TSS
## t = -0.53188, df = 8, p-value = 0.6093
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7295449 0.5033925
## sample estimates:
## cor
## -0.1848087
cor.test(mp_data$NMDS1,mp_data$TP,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS1 and mp_data$TP
## t = 2.1707, df = 8, p-value = 0.06175
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03372718 0.89526763
## sample estimates:
## cor
## 0.6088279
cor.test(mp_data$NMDS2,mp_data$SC,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS2 and mp_data$SC
## t = -1.8878, df = 8, p-value = 0.09574
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8779137 0.1144951
## sample estimates:
## cor
## -0.5551516
cor.test(mp_data$NMDS2,mp_data$Cl,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS2 and mp_data$Cl
## t = -2.0075, df = 8, p-value = 0.07959
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.88566068 0.07999075
## sample estimates:
## cor
## -0.5787857
cor.test(mp_data$NMDS2,mp_data$Ca,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS2 and mp_data$Ca
## t = -2.0776, df = 8, p-value = 0.07138
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8899172 0.0599934
## sample estimates:
## cor
## -0.5919946
cor.test(mp_data$NMDS2,mp_data$TKN,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS2 and mp_data$TKN
## t = -1.5161, df = 8, p-value = 0.168
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8494018 0.2237425
## sample estimates:
## cor
## -0.4724382
cor.test(mp_data$NMDS2,mp_data$TSS,method="pearson")
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS2 and mp_data$TSS
## t = -2.2942, df = 8, p-value = 0.05093
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9018636135 -0.0005221528
## sample estimates:
## cor
## -0.6299414
cor.test(mp_data$NMDS2,mp_data$TP,method="pearson") #significant
##
## Pearson's product-moment correlation
##
## data: mp_data$NMDS2 and mp_data$TP
## t = -3.3311, df = 8, p-value = 0.01037
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9405077 -0.2550771
## sample estimates:
## cor
## -0.7622789