library(devtools)
devtools::source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")
## Sourcing https://gist.githubusercontent.com/kdauria/524eade46135f6348140/raw/676acaca9a0a144ef320ae2ef00a31c3daa7179d/ggplot_smooth_func.R
## SHA-1 hash of file is c0b163b9fd2d7fe7bd5541e3266d8d36ff3b895d
Load the data
count <- plyr::count(df_final,"Scientific.name")
count$Scientific.name <- as.character(count$Scientific.name)
count <- count[which(count$freq>=15),] # get species which are presented in more than 15 times
count <- na.omit(count)
df <- df_final[which(df_final$Scientific.name %in% c(count$Scientific.name, "Engraulis ringens")),]
# anchovy is the most harvested species, so it is included in the analysis
df_cor <- df[,which(colnames(df_final) %in% c("SPPR", "TL", "TE", "PB", "PQ", "QB", "FCI", "NPP", "TST"))]
chart.Correlation(as.matrix(df_cor), histogram=TRUE, pch=19)
Table 1. 48 species which are represented at least 15 times as single species compartment or part of groups of species compartment
datatable(na.omit(count))
Make a regression of \(log_{10}(SPPR)\) vs the possible predictors
fit1 <- lm(SPPR~TL + PQ + QB + PB, data=df_final)
summary(fit1)
##
## Call:
## lm(formula = SPPR ~ TL + PQ + QB + PB, data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0210 -0.3485 -0.0325 0.2942 3.9337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.321534 0.055793 -5.763 8.68e-09 ***
## TL 1.007534 0.013632 73.910 < 2e-16 ***
## PQ -2.162989 0.095871 -22.562 < 2e-16 ***
## QB -0.015038 0.002564 -5.865 4.73e-09 ***
## PB 0.060722 0.010473 5.798 7.06e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5554 on 5929 degrees of freedom
## Multiple R-squared: 0.5482, Adjusted R-squared: 0.5479
## F-statistic: 1799 on 4 and 5929 DF, p-value: < 2.2e-16
vif(fit1)
## TL PQ QB PB
## 1.1447 1.7572 9.8515 10.0580
The result show that there is multicollinearity among predictors (as a rule of thumb \(\sqrt{VIF})>2\) indicates a multicollinearity problem), the predictor with highest VIF (variance inflation factor) is PB will be removed and model is fitted again
fit2 <- lm(SPPR~TL + PQ + QB, data=df_final)
summary(fit2)
##
## Call:
## lm(formula = SPPR ~ TL + PQ + QB, data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9990 -0.3528 -0.0348 0.2903 3.9306
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.439523 0.052092 -8.437 <2e-16 ***
## TL 1.015656 0.013597 74.697 <2e-16 ***
## PQ -1.798989 0.072653 -24.761 <2e-16 ***
## QB -0.001054 0.000872 -1.209 0.227
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.557 on 5930 degrees of freedom
## Multiple R-squared: 0.5457, Adjusted R-squared: 0.5454
## F-statistic: 2374 on 3 and 5930 DF, p-value: < 2.2e-16
vif(fit2)
## TL PQ QB
## 1.1326 1.0036 1.1334
The influence of PB on \(\log_{10}(SPPR)\) is not significant, hence this predictor is consequently removed.
fit3 <- lm(SPPR~TL + PQ, data=df_final)
summary(fit3)
##
## Call:
## lm(formula = SPPR ~ TL + PQ, data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9992 -0.3541 -0.0311 0.2911 3.9327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.46737 0.04672 -10.00 <2e-16 ***
## TL 1.02126 0.01278 79.90 <2e-16 ***
## PQ -1.79440 0.07256 -24.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.557 on 5931 degrees of freedom
## Multiple R-squared: 0.5455, Adjusted R-squared: 0.5454
## F-statistic: 3560 on 2 and 5931 DF, p-value: < 2.2e-16
vif(fit3)
## TL PQ
## 1.0009 1.0009
relImportance<- calc.relimp(fit3, type = "lmg", rela = TRUE)
relImportance$lmg # relative impportant of the predictors
## TL PQ
## 0.90534538 0.09465462
anova(fit3, fit2) # compare two models
## Analysis of Variance Table
##
## Model 1: SPPR ~ TL + PQ
## Model 2: SPPR ~ TL + PQ + QB
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 5931 1840.1
## 2 5930 1839.6 1 0.45311 1.4606 0.2269
The result indicates that the \(\log_{10}(SPPR)\) is positively correlated with TL and negatively correlated with gross growth efficiency (PQ). However, the TL is much more important predictor compared with PQ. TL explains more than 90% of \(R^2\). Also, ANOVA test is non-significant, we can conclude that the PQ does not add to linear prediction and we can drop it from the model.
fit4 <- lm(SPPR~TL, data=df_final)
summary(fit4)
##
## Call:
## lm(formula = SPPR ~ TL, data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8888 -0.3856 -0.0013 0.3203 4.1113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.82396 0.04667 -17.66 <2e-16 ***
## TL 1.03072 0.01342 76.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.585 on 5932 degrees of freedom
## Multiple R-squared: 0.4987, Adjusted R-squared: 0.4986
## F-statistic: 5901 on 1 and 5932 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(fit4)
ncvTest(fit4) # test for constant variance
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 105.4612 Df = 1 p = 9.67755e-25
library(nortest)
library(gvlma)
ad.test(fit4$residuals) # test for normality of residuals
##
## Anderson-Darling normality test
##
## data: fit4$residuals
## A = 26.215, p-value < 2.2e-16
summary(gvlma(fit4)) # global test from gvlma packages
##
## Call:
## lm(formula = SPPR ~ TL, data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8888 -0.3856 -0.0013 0.3203 4.1113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.82396 0.04667 -17.66 <2e-16 ***
## TL 1.03072 0.01342 76.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.585 on 5932 degrees of freedom
## Multiple R-squared: 0.4987, Adjusted R-squared: 0.4986
## F-statistic: 5901 on 1 and 5932 DF, p-value: < 2.2e-16
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = fit4)
##
## Value p-value Decision
## Global Stat 3719.071 0.000000 Assumptions NOT satisfied!
## Skewness 560.490 0.000000 Assumptions NOT satisfied!
## Kurtosis 3066.892 0.000000 Assumptions NOT satisfied!
## Link Function 7.522 0.006094 Assumptions NOT satisfied!
## Heteroscedasticity 84.167 0.000000 Assumptions NOT satisfied!
The graphical exploration for assumption diagnostic for linear regression is quite ok with constant variance and the residuals are more or less normally distributed. However, the formal tests indicate that the variance is not constant and the residuals do not follow normal distribution.
Check if \(\log_{10}(SPPR)\) depends on Species
fit5 <- lm(SPPR~TL+Species, data=df)
summary(fit5)
##
## Call:
## lm(formula = SPPR ~ TL + Species, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.32003 -0.27160 -0.01825 0.26055 1.93057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.60232 0.21149 7.576 8.12e-14 ***
## TL 0.55001 0.04269 12.883 < 2e-16 ***
## SpeciesAmerican angler -0.42654 0.14869 -2.869 0.004210 **
## SpeciesAmerican plaice -0.78144 0.14625 -5.343 1.13e-07 ***
## SpeciesAnchovy -1.26711 0.19816 -6.394 2.47e-10 ***
## SpeciesAtlantic argentine -1.18959 0.16611 -7.161 1.55e-12 ***
## SpeciesAtlantic bonito -0.37137 0.15052 -2.467 0.013785 *
## SpeciesAtlantic cod -0.90443 0.13108 -6.900 9.25e-12 ***
## SpeciesAtlantic halibut -0.38996 0.14384 -2.711 0.006822 **
## SpeciesAtlantic horse mackerel -1.09607 0.14775 -7.419 2.53e-13 ***
## SpeciesAtlantic mackerel -0.96028 0.13677 -7.021 4.07e-12 ***
## SpeciesAtlantic saury -1.23509 0.16746 -7.376 3.44e-13 ***
## SpeciesBig-eye tuna 0.04975 0.16002 0.311 0.755960
## SpeciesBluefin tuna -0.49489 0.15176 -3.261 0.001148 **
## SpeciesBluefish -0.54002 0.15362 -3.515 0.000459 ***
## SpeciesCapelin -1.54651 0.15138 -10.216 < 2e-16 ***
## SpeciesChub mackerel -0.85594 0.13859 -6.176 9.56e-10 ***
## SpeciesCommon sole -0.98641 0.16403 -6.014 2.55e-09 ***
## SpeciesEuropean anchovy -1.18478 0.16507 -7.177 1.39e-12 ***
## SpeciesEuropean hake -0.59983 0.15995 -3.750 0.000187 ***
## SpeciesEuropean pilchard -1.34525 0.16856 -7.981 3.99e-15 ***
## SpeciesFlathead grey mullet -1.27206 0.16394 -7.759 2.11e-14 ***
## SpeciesGilthead seabream -0.80015 0.16008 -4.998 6.83e-07 ***
## SpeciesGreenland halibut -0.76129 0.13786 -5.522 4.27e-08 ***
## SpeciesHaddock -0.92694 0.14092 -6.578 7.70e-11 ***
## SpeciesHerring -1.32887 0.14166 -9.381 < 2e-16 ***
## SpeciesLargehead hairtail -0.36409 0.15781 -2.307 0.021254 *
## SpeciesNorthern shrimp -1.29507 0.15890 -8.150 1.09e-15 ***
## SpeciesOctopus -1.13142 0.16570 -6.828 1.49e-11 ***
## SpeciesPicked dogfish -0.42325 0.14317 -2.956 0.003188 **
## SpeciesRound sardinella -1.17754 0.16624 -7.083 2.66e-12 ***
## SpeciesRoundnose grenadier -0.49196 0.16156 -3.045 0.002388 **
## SpeciesSaithe -0.74168 0.13799 -5.375 9.56e-08 ***
## SpeciesShort-finned squid -1.08873 0.16111 -6.758 2.38e-11 ***
## SpeciesSilver hake -0.56773 0.14632 -3.880 0.000111 ***
## SpeciesSkipjack tuna -0.09633 0.15769 -0.611 0.541446
## SpeciesSnow crab -0.67279 0.17077 -3.940 8.72e-05 ***
## SpeciesSouth American pilchard -1.26597 0.16827 -7.524 1.19e-13 ***
## SpeciesSpotted wolffish -0.72321 0.15634 -4.626 4.22e-06 ***
## SpeciesSprat -1.37003 0.16308 -8.401 < 2e-16 ***
## SpeciesSwordfish -0.27580 0.14865 -1.855 0.063833 .
## SpeciesThorny skate -0.35672 0.15084 -2.365 0.018229 *
## SpeciesTusk -0.45148 0.14872 -3.036 0.002462 **
## SpeciesWhite hake -0.43499 0.14874 -2.924 0.003529 **
## SpeciesWhiting -0.87846 0.15589 -5.635 2.28e-08 ***
## SpeciesWinter skate -0.38326 0.16327 -2.347 0.019099 *
## SpeciesWitch flounder -1.04347 0.15985 -6.528 1.06e-10 ***
## SpeciesWolffish -0.69143 0.14217 -4.863 1.34e-06 ***
## SpeciesYellowfin tuna -0.09549 0.15178 -0.629 0.529417
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4524 on 995 degrees of freedom
## Multiple R-squared: 0.6564, Adjusted R-squared: 0.6398
## F-statistic: 39.6 on 48 and 995 DF, p-value: < 2.2e-16
fit6 <- lm(SPPR~TL*Species, data=df) #model with interaction of TL and Species.
summary(fit6)
##
## Call:
## lm(formula = SPPR ~ TL * Species, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.32447 -0.27273 -0.02421 0.24844 1.99984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.563550 1.625937 0.962 0.3365
## TL 0.559272 0.387498 1.443 0.1493
## SpeciesAmerican angler 0.510428 2.085066 0.245 0.8067
## SpeciesAmerican plaice -0.793993 1.846523 -0.430 0.6673
## SpeciesAnchovy -0.589735 1.869196 -0.316 0.7524
## SpeciesAtlantic argentine -2.613143 2.479822 -1.054 0.2923
## SpeciesAtlantic bonito 3.418762 2.049667 1.668 0.0957 .
## SpeciesAtlantic cod -1.827110 1.796239 -1.017 0.3093
## SpeciesAtlantic halibut -0.443077 1.966010 -0.225 0.8217
## SpeciesAtlantic horse mackerel -0.399248 1.935658 -0.206 0.8366
## SpeciesAtlantic mackerel -1.039039 1.768605 -0.587 0.5570
## SpeciesAtlantic saury -3.724290 2.681409 -1.389 0.1652
## SpeciesBig-eye tuna -2.762659 2.046070 -1.350 0.1773
## SpeciesBluefin tuna -1.707088 2.109686 -0.809 0.4186
## SpeciesBluefish -1.493125 2.411879 -0.619 0.5360
## SpeciesCapelin -2.427164 2.257978 -1.075 0.2827
## SpeciesChub mackerel -0.297456 1.756224 -0.169 0.8655
## SpeciesCommon sole 0.113689 2.077199 0.055 0.9564
## SpeciesEuropean anchovy -0.608493 1.880210 -0.324 0.7463
## SpeciesEuropean hake 0.550080 2.087396 0.264 0.7922
## SpeciesEuropean pilchard -1.277782 2.141420 -0.597 0.5509
## SpeciesFlathead grey mullet 0.219181 1.705438 0.129 0.8978
## SpeciesGilthead seabream -3.435381 2.211457 -1.553 0.1207
## SpeciesGreenland halibut -1.580903 1.921165 -0.823 0.4108
## SpeciesHaddock 0.542195 1.830017 0.296 0.7671
## SpeciesHerring -2.390768 1.816886 -1.316 0.1885
## SpeciesLargehead hairtail -2.587772 1.883819 -1.374 0.1699
## SpeciesNorthern shrimp -1.817215 1.975532 -0.920 0.3579
## SpeciesOctopus -1.490679 2.274483 -0.655 0.5124
## SpeciesPicked dogfish 1.984094 1.924676 1.031 0.3029
## SpeciesRound sardinella -0.873922 1.935608 -0.451 0.6517
## SpeciesRoundnose grenadier 0.779836 2.180094 0.358 0.7206
## SpeciesSaithe -0.235999 1.819645 -0.130 0.8968
## SpeciesShort-finned squid -1.350383 2.087658 -0.647 0.5179
## SpeciesSilver hake -1.697932 2.013452 -0.843 0.3993
## SpeciesSkipjack tuna -0.667965 2.060299 -0.324 0.7459
## SpeciesSnow crab -10.029496 3.971369 -2.525 0.0117 *
## SpeciesSouth American pilchard 0.255210 1.958471 0.130 0.8963
## SpeciesSpotted wolffish -0.116712 2.051217 -0.057 0.9546
## SpeciesSprat -1.705181 1.965622 -0.868 0.3859
## SpeciesSwordfish -0.518426 2.220139 -0.234 0.8154
## SpeciesThorny skate -0.898912 1.902462 -0.472 0.6367
## SpeciesTusk -0.861566 1.991195 -0.433 0.6653
## SpeciesWhite hake -0.759902 1.993745 -0.381 0.7032
## SpeciesWhiting -1.155420 1.887447 -0.612 0.5406
## SpeciesWinter skate -2.751121 2.200808 -1.250 0.2116
## SpeciesWitch flounder -1.338084 1.889450 -0.708 0.4790
## SpeciesWolffish 0.295527 1.870301 0.158 0.8745
## SpeciesYellowfin tuna -1.393145 1.987708 -0.701 0.4835
## TL:SpeciesAmerican angler -0.228535 0.501155 -0.456 0.6485
## TL:SpeciesAmerican plaice 0.005171 0.458397 0.011 0.9910
## TL:SpeciesAnchovy -0.278649 0.545790 -0.511 0.6098
## TL:SpeciesAtlantic argentine 0.422207 0.673947 0.626 0.5312
## TL:SpeciesAtlantic bonito -0.963525 0.500318 -1.926 0.0544 .
## TL:SpeciesAtlantic cod 0.235803 0.433299 0.544 0.5864
## TL:SpeciesAtlantic halibut 0.013333 0.472811 0.028 0.9775
## TL:SpeciesAtlantic horse mackerel -0.197115 0.489283 -0.403 0.6871
## TL:SpeciesAtlantic mackerel 0.022954 0.431447 0.053 0.9576
## TL:SpeciesAtlantic saury 0.769124 0.761520 1.010 0.3128
## TL:SpeciesBig-eye tuna 0.689915 0.492112 1.402 0.1613
## TL:SpeciesBluefin tuna 0.293883 0.505653 0.581 0.5612
## TL:SpeciesBluefish 0.235789 0.585844 0.402 0.6874
## TL:SpeciesCapelin 0.273621 0.617854 0.443 0.6580
## TL:SpeciesChub mackerel -0.162760 0.433758 -0.375 0.7076
## TL:SpeciesCommon sole -0.327563 0.547115 -0.599 0.5495
## TL:SpeciesEuropean anchovy -0.186410 0.495690 -0.376 0.7070
## TL:SpeciesEuropean hake -0.274518 0.497087 -0.552 0.5809
## TL:SpeciesEuropean pilchard -0.018388 0.587547 -0.031 0.9750
## TL:SpeciesFlathead grey mullet -0.498969 0.423079 -1.179 0.2385
## TL:SpeciesGilthead seabream 0.803510 0.597252 1.345 0.1788
## TL:SpeciesGreenland halibut 0.199774 0.460323 0.434 0.6644
## TL:SpeciesHaddock -0.408811 0.452436 -0.904 0.3664
## TL:SpeciesHerring 0.318680 0.456060 0.699 0.4849
## TL:SpeciesLargehead hairtail 0.558443 0.454314 1.229 0.2193
## TL:SpeciesNorthern shrimp 0.206502 0.579062 0.357 0.7215
## TL:SpeciesOctopus 0.106505 0.602503 0.177 0.8597
## TL:SpeciesPicked dogfish -0.613108 0.467561 -1.311 0.1901
## TL:SpeciesRound sardinella -0.099173 0.525266 -0.189 0.8503
## TL:SpeciesRoundnose grenadier -0.346806 0.554325 -0.626 0.5317
## TL:SpeciesSaithe -0.125324 0.437059 -0.287 0.7744
## TL:SpeciesShort-finned squid 0.071192 0.521752 0.136 0.8915
## TL:SpeciesSilver hake 0.285586 0.489231 0.584 0.5595
## TL:SpeciesSkipjack tuna 0.141643 0.497286 0.285 0.7758
## TL:SpeciesSnow crab 3.161116 1.281970 2.466 0.0138 *
## TL:SpeciesSouth American pilchard -0.537984 0.547980 -0.982 0.3265
## TL:SpeciesSpotted wolffish -0.171816 0.526778 -0.326 0.7444
## TL:SpeciesSprat 0.107489 0.517631 0.208 0.8355
## TL:SpeciesSwordfish 0.058153 0.529903 0.110 0.9126
## TL:SpeciesThorny skate 0.141926 0.464312 0.306 0.7599
## TL:SpeciesTusk 0.100930 0.478782 0.211 0.8331
## TL:SpeciesWhite hake 0.080324 0.479984 0.167 0.8671
## TL:SpeciesWhiting 0.071481 0.457727 0.156 0.8759
## TL:SpeciesWinter skate 0.617687 0.546391 1.130 0.2586
## TL:SpeciesWitch flounder 0.091379 0.483279 0.189 0.8501
## TL:SpeciesWolffish -0.278931 0.467694 -0.596 0.5511
## TL:SpeciesYellowfin tuna 0.316481 0.476684 0.664 0.5069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4475 on 948 degrees of freedom
## Multiple R-squared: 0.6797, Adjusted R-squared: 0.6476
## F-statistic: 21.18 on 95 and 948 DF, p-value: < 2.2e-16
The results from model 6 (fit6) indicates that the effect of TL on SPPR does not depend on species, meaning that the increase/decrease in TL by 1 unit will lead to the same changes in \(\log_{10}(SPPR)\) for all species.
chart.Correlation(as.matrix(dt_cor), histogram=TRUE, pch=19)
fit7 <- lm(MeanSPPR~MeanTL+MeanPB+MeanQB, data=dt)
The MeanPB and MeanQB are multicolinearity VIF>4, hence, the predictors with highest VIF (MeanQB) are eliminated the model is then fitted again
fit8 <- lm(MeanSPPR~MeanTL+MeanPB, data=dt)
fit9 <- lm(MeanSPPR~MeanTL, data=dt)
AIC(fit8, fit9)
## df AIC
## fit8 4 10.644010
## fit9 3 8.698002
anova(fit9, fit8)
## Analysis of Variance Table
##
## Model 1: MeanSPPR ~ MeanTL
## Model 2: MeanSPPR ~ MeanTL + MeanPB
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 46 2.9729
## 2 45 2.9695 1 0.0033421 0.0506 0.823
AIC of model 8 is lower, and the anova test if non-significant indicate that the MeanPB can be dropt from the model. Hence, the \(\log_{10}(\pi_{SPPR})\) is only dependent on meanTL of the species.
Draw plot for regression of mean(log10(SPPR)) and log10(SPPR) vs TL
p3 <- ggplot(data=df, aes(x=TL, y=SPPR, colour=Species))+
geom_point()+
scale_colour_discrete(guide=FALSE)+
geom_smooth(method="lm", se=FALSE)+
facet_wrap(~Species, nrow=8)+
stat_smooth_func(geom="text",method="lm",hjust=0, vjust=-1,parse=TRUE, xpos=2, ypos=3)+
ylab(expression(log[10](SPPR)))
p3
Figure 3. Correlation of log10(SPPR) vs TL for 48 main species which are most represented in the food web models.
library(gvlma)
fit <- lm(MeanSPPR~MeanTL, data=dt)
fit1 <- lm(MeanSPPR~MeanTL + MeanPB , data=dt)
AIC(fit, fit1)
## df AIC
## fit 3 8.698002
## fit1 4 10.644010
summary(gvlma(fit1))
##
## Call:
## lm(formula = MeanSPPR ~ MeanTL + MeanPB, data = dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51678 -0.21645 0.00526 0.16354 0.54309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.61502 0.43662 -3.699 0.000587 ***
## MeanTL 1.23091 0.10482 11.743 2.68e-15 ***
## MeanPB -0.02049 0.09106 -0.225 0.822962
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2569 on 45 degrees of freedom
## Multiple R-squared: 0.8409, Adjusted R-squared: 0.8338
## F-statistic: 118.9 on 2 and 45 DF, p-value: < 2.2e-16
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = fit1)
##
## Value p-value Decision
## Global Stat 9.4298 0.051209 Assumptions acceptable.
## Skewness 0.1301 0.718339 Assumptions acceptable.
## Kurtosis 0.7828 0.376272 Assumptions acceptable.
## Link Function 8.0912 0.004448 Assumptions NOT satisfied!
## Heteroscedasticity 0.4257 0.514111 Assumptions acceptable.
par(mfrow=c(2,2))
plot(fit)
Check the assumption of linear regression by formal test
# normal distribution of redisudal using shapiro.test
shapiro.test(fit$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit$residuals
## W = 0.98293, p-value = 0.7037
# equal variance by ncvTest
ncvTest(fit)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.1315153 Df = 1 p = 0.7168661
From these test the normality and constant variance assumptions are satisfied.
df_fishbase <- read.xlsx(file="fishbase.xlsx", 1)
colnames(df_fishbase)[which(colnames(df_fishbase)=="TL")] <- "MeanTL"
fitted <- predict(fit, newdata=df_fishbase, interval = "prediction")
df_fishbase<-cbind(df_fishbase, fitted)
df_fishbase$empirical.equation <- df_fishbase$MeanTL-1 #Assuming 10% of TE
df_fishbase <- cbind(df_fishbase[,1:3], round(df_fishbase[,4:ncol(df_fishbase)], digits=2))
datatable(df_fishbase)
ggplot(data=df_fishbase, aes(x=MeanTL, y=fit))+
geom_line(colour="blue")+
geom_line(aes(y=lwr), linetype="dashed", colour="blue")+
geom_line(aes(y=upr), linetype="dashed", colour="blue")+
geom_point(aes(y=empirical.equation, colour="red"))+
scale_colour_discrete("", labels="SPPR from empirical equation")+
geom_vline(xintercept = 4.1)