Make a regression based on average values
Model diagnostic for \(\log_{10}(\pi_{SPPR})\) vs MeanTL
Comparison with empirical approach

library(devtools)
devtools::source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")

## Sourcing https://gist.githubusercontent.com/kdauria/524eade46135f6348140/raw/676acaca9a0a144ef320ae2ef00a31c3daa7179d/ggplot_smooth_func.R

## SHA-1 hash of file is c0b163b9fd2d7fe7bd5541e3266d8d36ff3b895d

Load the data

count <- plyr::count(df_final,"Scientific.name")
count$Scientific.name <- as.character(count$Scientific.name)
count <- count[which(count$freq>=15),]             # get species which are presented in more than 15 times
count <- na.omit(count)
df <- df_final[which(df_final$Scientific.name %in% c(count$Scientific.name, "Engraulis ringens")),]
# anchovy is the most harvested species, so it is included in the analysis
df_cor <- df[,which(colnames(df_final) %in% c("SPPR", "TL", "TE", "PB", "PQ", "QB", "FCI", "NPP", "TST"))]

chart.Correlation(as.matrix(df_cor), histogram=TRUE, pch=19)

Table 1. 48 species which are represented at least 15 times as single species compartment or part of groups of species compartment

datatable(na.omit(count))

Make a regression of \(log_{10}(SPPR)\) vs the possible predictors

fit1 <- lm(SPPR~TL + PQ + QB + PB, data=df_final)
summary(fit1)

## 
## Call:
## lm(formula = SPPR ~ TL + PQ + QB + PB, data = df_final)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0210 -0.3485 -0.0325  0.2942  3.9337 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.321534   0.055793  -5.763 8.68e-09 ***
## TL           1.007534   0.013632  73.910  < 2e-16 ***
## PQ          -2.162989   0.095871 -22.562  < 2e-16 ***
## QB          -0.015038   0.002564  -5.865 4.73e-09 ***
## PB           0.060722   0.010473   5.798 7.06e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5554 on 5929 degrees of freedom
## Multiple R-squared:  0.5482, Adjusted R-squared:  0.5479 
## F-statistic:  1799 on 4 and 5929 DF,  p-value: < 2.2e-16

vif(fit1)

##      TL      PQ      QB      PB 
##  1.1447  1.7572  9.8515 10.0580

The result show that there is multicollinearity among predictors (as a rule of thumb \(\sqrt{VIF})>2\) indicates a multicollinearity problem), the predictor with highest VIF (variance inflation factor) is PB will be removed and model is fitted again

fit2 <- lm(SPPR~TL + PQ + QB, data=df_final)
summary(fit2)

## 
## Call:
## lm(formula = SPPR ~ TL + PQ + QB, data = df_final)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9990 -0.3528 -0.0348  0.2903  3.9306 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.439523   0.052092  -8.437   <2e-16 ***
## TL           1.015656   0.013597  74.697   <2e-16 ***
## PQ          -1.798989   0.072653 -24.761   <2e-16 ***
## QB          -0.001054   0.000872  -1.209    0.227    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.557 on 5930 degrees of freedom
## Multiple R-squared:  0.5457, Adjusted R-squared:  0.5454 
## F-statistic:  2374 on 3 and 5930 DF,  p-value: < 2.2e-16

vif(fit2)

##     TL     PQ     QB 
## 1.1326 1.0036 1.1334

The influence of PB on \(\log_{10}(SPPR)\) is not significant, hence this predictor is consequently removed.

fit3 <- lm(SPPR~TL + PQ, data=df_final)
summary(fit3)

## 
## Call:
## lm(formula = SPPR ~ TL + PQ, data = df_final)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9992 -0.3541 -0.0311  0.2911  3.9327 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.46737    0.04672  -10.00   <2e-16 ***
## TL           1.02126    0.01278   79.90   <2e-16 ***
## PQ          -1.79440    0.07256  -24.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.557 on 5931 degrees of freedom
## Multiple R-squared:  0.5455, Adjusted R-squared:  0.5454 
## F-statistic:  3560 on 2 and 5931 DF,  p-value: < 2.2e-16

vif(fit3)

##     TL     PQ 
## 1.0009 1.0009

relImportance<- calc.relimp(fit3, type = "lmg", rela = TRUE)
relImportance$lmg     # relative impportant of the predictors

##         TL         PQ 
## 0.90534538 0.09465462

anova(fit3, fit2)     # compare two models

## Analysis of Variance Table
## 
## Model 1: SPPR ~ TL + PQ
## Model 2: SPPR ~ TL + PQ + QB
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1   5931 1840.1                           
## 2   5930 1839.6  1   0.45311 1.4606 0.2269

The result indicates that the \(\log_{10}(SPPR)\) is positively correlated with TL and negatively correlated with gross growth efficiency (PQ). However, the TL is much more important predictor compared with PQ. TL explains more than 90% of \(R^2\). Also, ANOVA test is non-significant, we can conclude that the PQ does not add to linear prediction and we can drop it from the model.

fit4 <- lm(SPPR~TL, data=df_final)
summary(fit4)

## 
## Call:
## lm(formula = SPPR ~ TL, data = df_final)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8888 -0.3856 -0.0013  0.3203  4.1113 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.82396    0.04667  -17.66   <2e-16 ***
## TL           1.03072    0.01342   76.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.585 on 5932 degrees of freedom
## Multiple R-squared:  0.4987, Adjusted R-squared:  0.4986 
## F-statistic:  5901 on 1 and 5932 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(fit4)

ncvTest(fit4)              # test for constant variance

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 105.4612    Df = 1     p = 9.67755e-25

library(nortest)
library(gvlma)
ad.test(fit4$residuals)   # test for normality of residuals

## 
##  Anderson-Darling normality test
## 
## data:  fit4$residuals
## A = 26.215, p-value < 2.2e-16

summary(gvlma(fit4))      # global test from gvlma packages

## 
## Call:
## lm(formula = SPPR ~ TL, data = df_final)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8888 -0.3856 -0.0013  0.3203  4.1113 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.82396    0.04667  -17.66   <2e-16 ***
## TL           1.03072    0.01342   76.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.585 on 5932 degrees of freedom
## Multiple R-squared:  0.4987, Adjusted R-squared:  0.4986 
## F-statistic:  5901 on 1 and 5932 DF,  p-value: < 2.2e-16
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = fit4) 
## 
##                       Value  p-value                   Decision
## Global Stat        3719.071 0.000000 Assumptions NOT satisfied!
## Skewness            560.490 0.000000 Assumptions NOT satisfied!
## Kurtosis           3066.892 0.000000 Assumptions NOT satisfied!
## Link Function         7.522 0.006094 Assumptions NOT satisfied!
## Heteroscedasticity   84.167 0.000000 Assumptions NOT satisfied!

The graphical exploration for assumption diagnostic for linear regression is quite ok with constant variance and the residuals are more or less normally distributed. However, the formal tests indicate that the variance is not constant and the residuals do not follow normal distribution.

Check if \(\log_{10}(SPPR)\) depends on Species

fit5 <- lm(SPPR~TL+Species, data=df)
summary(fit5)

## 
## Call:
## lm(formula = SPPR ~ TL + Species, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.32003 -0.27160 -0.01825  0.26055  1.93057 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     1.60232    0.21149   7.576 8.12e-14 ***
## TL                              0.55001    0.04269  12.883  < 2e-16 ***
## SpeciesAmerican angler         -0.42654    0.14869  -2.869 0.004210 ** 
## SpeciesAmerican plaice         -0.78144    0.14625  -5.343 1.13e-07 ***
## SpeciesAnchovy                 -1.26711    0.19816  -6.394 2.47e-10 ***
## SpeciesAtlantic argentine      -1.18959    0.16611  -7.161 1.55e-12 ***
## SpeciesAtlantic bonito         -0.37137    0.15052  -2.467 0.013785 *  
## SpeciesAtlantic cod            -0.90443    0.13108  -6.900 9.25e-12 ***
## SpeciesAtlantic halibut        -0.38996    0.14384  -2.711 0.006822 ** 
## SpeciesAtlantic horse mackerel -1.09607    0.14775  -7.419 2.53e-13 ***
## SpeciesAtlantic mackerel       -0.96028    0.13677  -7.021 4.07e-12 ***
## SpeciesAtlantic saury          -1.23509    0.16746  -7.376 3.44e-13 ***
## SpeciesBig-eye tuna             0.04975    0.16002   0.311 0.755960    
## SpeciesBluefin tuna            -0.49489    0.15176  -3.261 0.001148 ** 
## SpeciesBluefish                -0.54002    0.15362  -3.515 0.000459 ***
## SpeciesCapelin                 -1.54651    0.15138 -10.216  < 2e-16 ***
## SpeciesChub mackerel           -0.85594    0.13859  -6.176 9.56e-10 ***
## SpeciesCommon sole             -0.98641    0.16403  -6.014 2.55e-09 ***
## SpeciesEuropean anchovy        -1.18478    0.16507  -7.177 1.39e-12 ***
## SpeciesEuropean hake           -0.59983    0.15995  -3.750 0.000187 ***
## SpeciesEuropean pilchard       -1.34525    0.16856  -7.981 3.99e-15 ***
## SpeciesFlathead grey mullet    -1.27206    0.16394  -7.759 2.11e-14 ***
## SpeciesGilthead seabream       -0.80015    0.16008  -4.998 6.83e-07 ***
## SpeciesGreenland halibut       -0.76129    0.13786  -5.522 4.27e-08 ***
## SpeciesHaddock                 -0.92694    0.14092  -6.578 7.70e-11 ***
## SpeciesHerring                 -1.32887    0.14166  -9.381  < 2e-16 ***
## SpeciesLargehead hairtail      -0.36409    0.15781  -2.307 0.021254 *  
## SpeciesNorthern shrimp         -1.29507    0.15890  -8.150 1.09e-15 ***
## SpeciesOctopus                 -1.13142    0.16570  -6.828 1.49e-11 ***
## SpeciesPicked dogfish          -0.42325    0.14317  -2.956 0.003188 ** 
## SpeciesRound sardinella        -1.17754    0.16624  -7.083 2.66e-12 ***
## SpeciesRoundnose grenadier     -0.49196    0.16156  -3.045 0.002388 ** 
## SpeciesSaithe                  -0.74168    0.13799  -5.375 9.56e-08 ***
## SpeciesShort-finned squid      -1.08873    0.16111  -6.758 2.38e-11 ***
## SpeciesSilver hake             -0.56773    0.14632  -3.880 0.000111 ***
## SpeciesSkipjack tuna           -0.09633    0.15769  -0.611 0.541446    
## SpeciesSnow crab               -0.67279    0.17077  -3.940 8.72e-05 ***
## SpeciesSouth American pilchard -1.26597    0.16827  -7.524 1.19e-13 ***
## SpeciesSpotted wolffish        -0.72321    0.15634  -4.626 4.22e-06 ***
## SpeciesSprat                   -1.37003    0.16308  -8.401  < 2e-16 ***
## SpeciesSwordfish               -0.27580    0.14865  -1.855 0.063833 .  
## SpeciesThorny skate            -0.35672    0.15084  -2.365 0.018229 *  
## SpeciesTusk                    -0.45148    0.14872  -3.036 0.002462 ** 
## SpeciesWhite hake              -0.43499    0.14874  -2.924 0.003529 ** 
## SpeciesWhiting                 -0.87846    0.15589  -5.635 2.28e-08 ***
## SpeciesWinter skate            -0.38326    0.16327  -2.347 0.019099 *  
## SpeciesWitch flounder          -1.04347    0.15985  -6.528 1.06e-10 ***
## SpeciesWolffish                -0.69143    0.14217  -4.863 1.34e-06 ***
## SpeciesYellowfin tuna          -0.09549    0.15178  -0.629 0.529417    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4524 on 995 degrees of freedom
## Multiple R-squared:  0.6564, Adjusted R-squared:  0.6398 
## F-statistic:  39.6 on 48 and 995 DF,  p-value: < 2.2e-16

fit6 <- lm(SPPR~TL*Species, data=df) #model with interaction of TL and Species.
summary(fit6)

## 
## Call:
## lm(formula = SPPR ~ TL * Species, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.32447 -0.27273 -0.02421  0.24844  1.99984 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                         1.563550   1.625937   0.962   0.3365  
## TL                                  0.559272   0.387498   1.443   0.1493  
## SpeciesAmerican angler              0.510428   2.085066   0.245   0.8067  
## SpeciesAmerican plaice             -0.793993   1.846523  -0.430   0.6673  
## SpeciesAnchovy                     -0.589735   1.869196  -0.316   0.7524  
## SpeciesAtlantic argentine          -2.613143   2.479822  -1.054   0.2923  
## SpeciesAtlantic bonito              3.418762   2.049667   1.668   0.0957 .
## SpeciesAtlantic cod                -1.827110   1.796239  -1.017   0.3093  
## SpeciesAtlantic halibut            -0.443077   1.966010  -0.225   0.8217  
## SpeciesAtlantic horse mackerel     -0.399248   1.935658  -0.206   0.8366  
## SpeciesAtlantic mackerel           -1.039039   1.768605  -0.587   0.5570  
## SpeciesAtlantic saury              -3.724290   2.681409  -1.389   0.1652  
## SpeciesBig-eye tuna                -2.762659   2.046070  -1.350   0.1773  
## SpeciesBluefin tuna                -1.707088   2.109686  -0.809   0.4186  
## SpeciesBluefish                    -1.493125   2.411879  -0.619   0.5360  
## SpeciesCapelin                     -2.427164   2.257978  -1.075   0.2827  
## SpeciesChub mackerel               -0.297456   1.756224  -0.169   0.8655  
## SpeciesCommon sole                  0.113689   2.077199   0.055   0.9564  
## SpeciesEuropean anchovy            -0.608493   1.880210  -0.324   0.7463  
## SpeciesEuropean hake                0.550080   2.087396   0.264   0.7922  
## SpeciesEuropean pilchard           -1.277782   2.141420  -0.597   0.5509  
## SpeciesFlathead grey mullet         0.219181   1.705438   0.129   0.8978  
## SpeciesGilthead seabream           -3.435381   2.211457  -1.553   0.1207  
## SpeciesGreenland halibut           -1.580903   1.921165  -0.823   0.4108  
## SpeciesHaddock                      0.542195   1.830017   0.296   0.7671  
## SpeciesHerring                     -2.390768   1.816886  -1.316   0.1885  
## SpeciesLargehead hairtail          -2.587772   1.883819  -1.374   0.1699  
## SpeciesNorthern shrimp             -1.817215   1.975532  -0.920   0.3579  
## SpeciesOctopus                     -1.490679   2.274483  -0.655   0.5124  
## SpeciesPicked dogfish               1.984094   1.924676   1.031   0.3029  
## SpeciesRound sardinella            -0.873922   1.935608  -0.451   0.6517  
## SpeciesRoundnose grenadier          0.779836   2.180094   0.358   0.7206  
## SpeciesSaithe                      -0.235999   1.819645  -0.130   0.8968  
## SpeciesShort-finned squid          -1.350383   2.087658  -0.647   0.5179  
## SpeciesSilver hake                 -1.697932   2.013452  -0.843   0.3993  
## SpeciesSkipjack tuna               -0.667965   2.060299  -0.324   0.7459  
## SpeciesSnow crab                  -10.029496   3.971369  -2.525   0.0117 *
## SpeciesSouth American pilchard      0.255210   1.958471   0.130   0.8963  
## SpeciesSpotted wolffish            -0.116712   2.051217  -0.057   0.9546  
## SpeciesSprat                       -1.705181   1.965622  -0.868   0.3859  
## SpeciesSwordfish                   -0.518426   2.220139  -0.234   0.8154  
## SpeciesThorny skate                -0.898912   1.902462  -0.472   0.6367  
## SpeciesTusk                        -0.861566   1.991195  -0.433   0.6653  
## SpeciesWhite hake                  -0.759902   1.993745  -0.381   0.7032  
## SpeciesWhiting                     -1.155420   1.887447  -0.612   0.5406  
## SpeciesWinter skate                -2.751121   2.200808  -1.250   0.2116  
## SpeciesWitch flounder              -1.338084   1.889450  -0.708   0.4790  
## SpeciesWolffish                     0.295527   1.870301   0.158   0.8745  
## SpeciesYellowfin tuna              -1.393145   1.987708  -0.701   0.4835  
## TL:SpeciesAmerican angler          -0.228535   0.501155  -0.456   0.6485  
## TL:SpeciesAmerican plaice           0.005171   0.458397   0.011   0.9910  
## TL:SpeciesAnchovy                  -0.278649   0.545790  -0.511   0.6098  
## TL:SpeciesAtlantic argentine        0.422207   0.673947   0.626   0.5312  
## TL:SpeciesAtlantic bonito          -0.963525   0.500318  -1.926   0.0544 .
## TL:SpeciesAtlantic cod              0.235803   0.433299   0.544   0.5864  
## TL:SpeciesAtlantic halibut          0.013333   0.472811   0.028   0.9775  
## TL:SpeciesAtlantic horse mackerel  -0.197115   0.489283  -0.403   0.6871  
## TL:SpeciesAtlantic mackerel         0.022954   0.431447   0.053   0.9576  
## TL:SpeciesAtlantic saury            0.769124   0.761520   1.010   0.3128  
## TL:SpeciesBig-eye tuna              0.689915   0.492112   1.402   0.1613  
## TL:SpeciesBluefin tuna              0.293883   0.505653   0.581   0.5612  
## TL:SpeciesBluefish                  0.235789   0.585844   0.402   0.6874  
## TL:SpeciesCapelin                   0.273621   0.617854   0.443   0.6580  
## TL:SpeciesChub mackerel            -0.162760   0.433758  -0.375   0.7076  
## TL:SpeciesCommon sole              -0.327563   0.547115  -0.599   0.5495  
## TL:SpeciesEuropean anchovy         -0.186410   0.495690  -0.376   0.7070  
## TL:SpeciesEuropean hake            -0.274518   0.497087  -0.552   0.5809  
## TL:SpeciesEuropean pilchard        -0.018388   0.587547  -0.031   0.9750  
## TL:SpeciesFlathead grey mullet     -0.498969   0.423079  -1.179   0.2385  
## TL:SpeciesGilthead seabream         0.803510   0.597252   1.345   0.1788  
## TL:SpeciesGreenland halibut         0.199774   0.460323   0.434   0.6644  
## TL:SpeciesHaddock                  -0.408811   0.452436  -0.904   0.3664  
## TL:SpeciesHerring                   0.318680   0.456060   0.699   0.4849  
## TL:SpeciesLargehead hairtail        0.558443   0.454314   1.229   0.2193  
## TL:SpeciesNorthern shrimp           0.206502   0.579062   0.357   0.7215  
## TL:SpeciesOctopus                   0.106505   0.602503   0.177   0.8597  
## TL:SpeciesPicked dogfish           -0.613108   0.467561  -1.311   0.1901  
## TL:SpeciesRound sardinella         -0.099173   0.525266  -0.189   0.8503  
## TL:SpeciesRoundnose grenadier      -0.346806   0.554325  -0.626   0.5317  
## TL:SpeciesSaithe                   -0.125324   0.437059  -0.287   0.7744  
## TL:SpeciesShort-finned squid        0.071192   0.521752   0.136   0.8915  
## TL:SpeciesSilver hake               0.285586   0.489231   0.584   0.5595  
## TL:SpeciesSkipjack tuna             0.141643   0.497286   0.285   0.7758  
## TL:SpeciesSnow crab                 3.161116   1.281970   2.466   0.0138 *
## TL:SpeciesSouth American pilchard  -0.537984   0.547980  -0.982   0.3265  
## TL:SpeciesSpotted wolffish         -0.171816   0.526778  -0.326   0.7444  
## TL:SpeciesSprat                     0.107489   0.517631   0.208   0.8355  
## TL:SpeciesSwordfish                 0.058153   0.529903   0.110   0.9126  
## TL:SpeciesThorny skate              0.141926   0.464312   0.306   0.7599  
## TL:SpeciesTusk                      0.100930   0.478782   0.211   0.8331  
## TL:SpeciesWhite hake                0.080324   0.479984   0.167   0.8671  
## TL:SpeciesWhiting                   0.071481   0.457727   0.156   0.8759  
## TL:SpeciesWinter skate              0.617687   0.546391   1.130   0.2586  
## TL:SpeciesWitch flounder            0.091379   0.483279   0.189   0.8501  
## TL:SpeciesWolffish                 -0.278931   0.467694  -0.596   0.5511  
## TL:SpeciesYellowfin tuna            0.316481   0.476684   0.664   0.5069  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4475 on 948 degrees of freedom
## Multiple R-squared:  0.6797, Adjusted R-squared:  0.6476 
## F-statistic: 21.18 on 95 and 948 DF,  p-value: < 2.2e-16

The results from model 6 (fit6) indicates that the effect of TL on SPPR does not depend on species, meaning that the increase/decrease in TL by 1 unit will lead to the same changes in \(\log_{10}(SPPR)\) for all species.

Make a regression based on average values

chart.Correlation(as.matrix(dt_cor), histogram=TRUE, pch=19)

fit7 <- lm(MeanSPPR~MeanTL+MeanPB+MeanQB, data=dt)

The MeanPB and MeanQB are multicolinearity VIF>4, hence, the predictors with highest VIF (MeanQB) are eliminated the model is then fitted again

fit8 <- lm(MeanSPPR~MeanTL+MeanPB, data=dt)
fit9 <- lm(MeanSPPR~MeanTL, data=dt)
AIC(fit8, fit9)

##      df       AIC
## fit8  4 10.644010
## fit9  3  8.698002

anova(fit9, fit8)

## Analysis of Variance Table
## 
## Model 1: MeanSPPR ~ MeanTL
## Model 2: MeanSPPR ~ MeanTL + MeanPB
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     46 2.9729                           
## 2     45 2.9695  1 0.0033421 0.0506  0.823

AIC of model 8 is lower, and the anova test if non-significant indicate that the MeanPB can be dropt from the model. Hence, the \(\log_{10}(\pi_{SPPR})\) is only dependent on meanTL of the species.

Draw plot for regression of mean(log10(SPPR)) and log10(SPPR) vs TL

p3 <- ggplot(data=df, aes(x=TL, y=SPPR, colour=Species))+
  geom_point()+
  scale_colour_discrete(guide=FALSE)+
  geom_smooth(method="lm", se=FALSE)+
  facet_wrap(~Species, nrow=8)+
  stat_smooth_func(geom="text",method="lm",hjust=0, vjust=-1,parse=TRUE, xpos=2, ypos=3)+
  ylab(expression(log[10](SPPR)))
p3

Figure 3. Correlation of log10(SPPR) vs TL for 48 main species which are most represented in the food web models.

Model diagnostic for \(\log_{10}(\pi_{SPPR})\) vs MeanTL

library(gvlma)
fit <- lm(MeanSPPR~MeanTL, data=dt)
fit1 <- lm(MeanSPPR~MeanTL + MeanPB , data=dt)
AIC(fit, fit1)

##      df       AIC
## fit   3  8.698002
## fit1  4 10.644010

summary(gvlma(fit1))

## 
## Call:
## lm(formula = MeanSPPR ~ MeanTL + MeanPB, data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51678 -0.21645  0.00526  0.16354  0.54309 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.61502    0.43662  -3.699 0.000587 ***
## MeanTL       1.23091    0.10482  11.743 2.68e-15 ***
## MeanPB      -0.02049    0.09106  -0.225 0.822962    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2569 on 45 degrees of freedom
## Multiple R-squared:  0.8409, Adjusted R-squared:  0.8338 
## F-statistic: 118.9 on 2 and 45 DF,  p-value: < 2.2e-16
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = fit1) 
## 
##                     Value  p-value                   Decision
## Global Stat        9.4298 0.051209    Assumptions acceptable.
## Skewness           0.1301 0.718339    Assumptions acceptable.
## Kurtosis           0.7828 0.376272    Assumptions acceptable.
## Link Function      8.0912 0.004448 Assumptions NOT satisfied!
## Heteroscedasticity 0.4257 0.514111    Assumptions acceptable.

par(mfrow=c(2,2))
plot(fit)

Check the assumption of linear regression by formal test

# normal distribution of redisudal using shapiro.test
shapiro.test(fit$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  fit$residuals
## W = 0.98293, p-value = 0.7037

# equal variance by ncvTest
ncvTest(fit)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.1315153    Df = 1     p = 0.7168661

From these test the normality and constant variance assumptions are satisfied.

Comparison with empirical approach

df_fishbase <- read.xlsx(file="fishbase.xlsx", 1)
colnames(df_fishbase)[which(colnames(df_fishbase)=="TL")] <- "MeanTL"
fitted <- predict(fit, newdata=df_fishbase, interval = "prediction")
df_fishbase<-cbind(df_fishbase, fitted)
df_fishbase$empirical.equation <- df_fishbase$MeanTL-1  #Assuming 10% of TE
df_fishbase <- cbind(df_fishbase[,1:3], round(df_fishbase[,4:ncol(df_fishbase)], digits=2))
datatable(df_fishbase)

ggplot(data=df_fishbase, aes(x=MeanTL, y=fit))+
  geom_line(colour="blue")+
  geom_line(aes(y=lwr), linetype="dashed", colour="blue")+
  geom_line(aes(y=upr), linetype="dashed", colour="blue")+
  geom_point(aes(y=empirical.equation, colour="red"))+
  scale_colour_discrete("", labels="SPPR from empirical equation")+
  geom_vline(xintercept = 4.1)

This code is to final analysis after discussion and need to be improved

LDA

Jan, 2016

Make a regression based on average values

Model diagnostic for \(\log_{10}(\pi_{SPPR})\) vs MeanTL

Comparison with empirical approach