library(devtools)
devtools::source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")## Sourcing https://gist.githubusercontent.com/kdauria/524eade46135f6348140/raw/676acaca9a0a144ef320ae2ef00a31c3daa7179d/ggplot_smooth_func.R
## SHA-1 hash of file is c0b163b9fd2d7fe7bd5541e3266d8d36ff3b895d
Load the data
count <- plyr::count(df_final,"Scientific.name")
count$Scientific.name <- as.character(count$Scientific.name)
count <- count[which(count$freq>=15),] # get species which are presented in more than 15 times
count <- na.omit(count)
df <- df_final[which(df_final$Scientific.name %in% c(count$Scientific.name, "Engraulis ringens")),]
df_cor <- df[,which(colnames(df_final) %in% c("SPPR", "TL", "TE", "PB", "PQ", "QB", "FCI", "NPP", "TST"))]chart.Correlation(as.matrix(df_cor), histogram=TRUE, pch=19) Table 1. 8 species which are represented at least 30 times as single species compartment or part of groups of species compartment
datatable(na.omit(count))Create summary data for all species which occur more than 15 times
chart.Correlation(as.matrix(dt_cor), histogram=TRUE, pch=19) Draw plot for regression of mean(log10(SPPR)) and log10(SPPR) vs TL
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
pushViewport(viewport(layout=grid.layout(2,1)))
print(p1+theme(plot.margin=unit(c(1,1,1,1), "mm")), vp=vplayout(1,1))
print(p2+theme(plot.margin=unit(c(1,1,1,1), "mm")), vp=vplayout(2,1))library(gvlma)
fit <- lm(MeanSPPR~MeanTL, data=dt)
fit1 <- lm(MeanSPPR~MeanTL+MeanPB, data=dt)
summary(gvlma(fit))##
## Call:
## lm(formula = MeanSPPR ~ MeanTL, data = dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51847 -0.22738 0.00819 0.15999 0.55475
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.68761 0.29121 -5.795 5.86e-07 ***
## MeanTL 1.24594 0.07996 15.581 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2542 on 46 degrees of freedom
## Multiple R-squared: 0.8407, Adjusted R-squared: 0.8372
## F-statistic: 242.8 on 1 and 46 DF, p-value: < 2.2e-16
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = fit)
##
## Value p-value Decision
## Global Stat 8.3925 0.078213 Assumptions acceptable.
## Skewness 0.0943 0.758780 Assumptions acceptable.
## Kurtosis 0.8069 0.369025 Assumptions acceptable.
## Link Function 6.9524 0.008371 Assumptions NOT satisfied!
## Heteroscedasticity 0.5389 0.462887 Assumptions acceptable.
par(mfrow=c(2,2))
plot(fit)Check the assumption of linear regression by formal test
# normal distribution of redisudal using shapiro.test
shapiro.test(fit$residuals)##
## Shapiro-Wilk normality test
##
## data: fit$residuals
## W = 0.98293, p-value = 0.7037
# equal variance by ncvTest
ncvTest(fit)## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.1315153 Df = 1 p = 0.7168661
From these test the normality and constant variance assumptions are satisfied.
Figure 3. Relation ship between \(\log_{10}\text(SPPR)\) and TL
p2Figure 4. Relation ship between \(\text(mean)\log_{10}\text(SPPR)\) and TL