library(devtools)
devtools::source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")
## Sourcing https://gist.githubusercontent.com/kdauria/524eade46135f6348140/raw/676acaca9a0a144ef320ae2ef00a31c3daa7179d/ggplot_smooth_func.R
## SHA-1 hash of file is c0b163b9fd2d7fe7bd5541e3266d8d36ff3b895d

Load the data

count <- plyr::count(df_final,"Scientific.name")
count$Scientific.name <- as.character(count$Scientific.name)
count <- count[which(count$freq>=15),]             # get species which are presented in more than 15 times
count <- na.omit(count)
df <- df_final[which(df_final$Scientific.name %in% c(count$Scientific.name, "Engraulis ringens")),]
df_cor <- df[,which(colnames(df_final) %in% c("SPPR", "TL", "TE", "PB", "PQ", "QB", "FCI", "NPP", "TST"))]
chart.Correlation(as.matrix(df_cor), histogram=TRUE, pch=19) 

Table 1. 8 species which are represented at least 30 times as single species compartment or part of groups of species compartment

datatable(na.omit(count))

Create summary data for all species which occur more than 15 times

chart.Correlation(as.matrix(dt_cor), histogram=TRUE, pch=19) 

Draw plot for regression of mean(log10(SPPR)) and log10(SPPR) vs TL

vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
pushViewport(viewport(layout=grid.layout(2,1)))

print(p1+theme(plot.margin=unit(c(1,1,1,1), "mm")), vp=vplayout(1,1))
print(p2+theme(plot.margin=unit(c(1,1,1,1), "mm")), vp=vplayout(2,1))

library(gvlma)
fit <- lm(MeanSPPR~MeanTL, data=dt)
fit1 <- lm(MeanSPPR~MeanTL+MeanPB, data=dt)
summary(gvlma(fit))
## 
## Call:
## lm(formula = MeanSPPR ~ MeanTL, data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51847 -0.22738  0.00819  0.15999  0.55475 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.68761    0.29121  -5.795 5.86e-07 ***
## MeanTL       1.24594    0.07996  15.581  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2542 on 46 degrees of freedom
## Multiple R-squared:  0.8407, Adjusted R-squared:  0.8372 
## F-statistic: 242.8 on 1 and 46 DF,  p-value: < 2.2e-16
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = fit) 
## 
##                     Value  p-value                   Decision
## Global Stat        8.3925 0.078213    Assumptions acceptable.
## Skewness           0.0943 0.758780    Assumptions acceptable.
## Kurtosis           0.8069 0.369025    Assumptions acceptable.
## Link Function      6.9524 0.008371 Assumptions NOT satisfied!
## Heteroscedasticity 0.5389 0.462887    Assumptions acceptable.
par(mfrow=c(2,2))
plot(fit)

Check the assumption of linear regression by formal test

# normal distribution of redisudal using shapiro.test
shapiro.test(fit$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  fit$residuals
## W = 0.98293, p-value = 0.7037
# equal variance by ncvTest
ncvTest(fit)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.1315153    Df = 1     p = 0.7168661

From these test the normality and constant variance assumptions are satisfied.

Figure 3. Relation ship between \(\log_{10}\text(SPPR)\) and TL

p2

Figure 4. Relation ship between \(\text(mean)\log_{10}\text(SPPR)\) and TL