This code is to final analysis after discussion and need to be improved

library(devtools)
devtools::source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")

## Sourcing https://gist.githubusercontent.com/kdauria/524eade46135f6348140/raw/676acaca9a0a144ef320ae2ef00a31c3daa7179d/ggplot_smooth_func.R

## SHA-1 hash of file is c0b163b9fd2d7fe7bd5541e3266d8d36ff3b895d

Load the data

count <- plyr::count(df_final,"Scientific.name")
count <- count[which(count$freq>=30),]             # get species which are presented in more than 15 times
count <- na.omit(count)
df <- df_final[which(df_final$Scientific.name %in% count$Scientific.name),]
df_cor <- df[,which(colnames(df_final) %in% c("SPPR", "TL", "TE", "PB", "PQ", "QB"))]

Table 1. 8 species which are represented at least 30 times as single species compartment or part of groups of species compartment

datatable(na.omit(count))

We can also work with only species which are presented in the food web models as a single species compartments and investigate the species which are presented more than 15 times

df_final_S <- df_final[which(df_final$Type=="S"),]
count_S <- plyr::count(df_final_S,"Scientific.name")
count_S <- count_S[which(count_S$freq>=15),]             # get species which are presented in more than 15 times
df_S <- df_final_S[c(which(df_final_S$Scientific.name %in% count_S$Scientific.name)),]
df_S_cor <- df_S[,which(colnames(df_final) %in% c("SPPR", "TL", "TE", "PB", "PQ", "QB"))]

Table 2. Data for 5 species which are presented more than 15 times in the food web models as single species compartments

datatable(na.omit(count_S))

Draw the correlation for log10(SPPR)vs all possible predictors.

chart.Correlation(as.matrix(df_cor), histogram=TRUE, pch=19)

Figure 1. Correlations of \(\log_{10}\text(SPPR)\) vs predictors for 8 species which are presented at least 30 times in the food web models.

chart.Correlation(as.matrix(df_S_cor), histogram=TRUE, pch=19)

Figure 2. Correlations of \(\log_{10}\text(SPPR)\) vs predictors for 5 species which are presented at least 15 times as single species compartment in the food web models.

Create summary data for all species which occur more than 15 times

df_summary1 <- df[, which(colnames(df) %in% c("ID", "Model", "Ecosystem.type", 
                                              "Group", "SPPR", "TYPE","QB", 
                                              "PQ", "TE", "TL", "Scientific.name"))]
by_TYPE <- group_by(df_summary1, TYPE)
by_name <- group_by(df_summary1, Scientific.name)

TL <- dplyr::summarise(by_name,
                               n=n(),
                               MinTL = min(TL, na.rm=TRUE),
                               MeanTL = mean(TL, na.rm=TRUE),
                               MedianTL = median(TL, na.rm=TRUE),
                               MaxTL = max(TL, na.rm=TRUE))
TL <- as.data.frame(TL)
SPPR <- dplyr::summarise(by_name,
                               n=n(),
                               MinSPPR = min(SPPR, na.rm=TRUE),
                               MeanSPPR = mean(SPPR, na.rm=TRUE),
                               MedianSPPR = median(SPPR, na.rm=TRUE),
                               MaxSPPR = max(SPPR, na.rm=TRUE))
SPPR <- as.data.frame(SPPR)
dt <- cbind(TL, SPPR[,3:6])

Create summary data for all single species compartment which occur more than 10 times

df_summary2 <- df_S[, which(colnames(df_S) %in% c("ID", "Model", "Ecosystem.type", 
                                              "Group", "SPPR", "TYPE","QB", 
                                              "PQ", "TE", "TL", "Scientific.name"))]
by_TYPE_S <- group_by(df_summary2, TYPE)
by_name_S <- group_by(df_summary2, Scientific.name)

TL_S <- dplyr::summarise(by_name_S,
                               n=n(),
                               MinTL = min(TL, na.rm=TRUE),
                               MeanTL = mean(TL, na.rm=TRUE),
                               MedianTL = median(TL, na.rm=TRUE),
                               MaxTL = max(TL, na.rm=TRUE))
TL_S <- as.data.frame(TL_S)
SPPR_S <- dplyr::summarise(by_name_S,
                               n=n(),
                               MinSPPR = min(SPPR, na.rm=TRUE),
                               MeanSPPR = mean(SPPR, na.rm=TRUE),
                               MedianSPPR = median(SPPR, na.rm=TRUE),
                               MaxSPPR = max(SPPR, na.rm=TRUE))
SPPR_S <- as.data.frame(SPPR_S)
dt_S <-cbind(TL_S, SPPR_S[,3:6])

Draw plot for regression of mean(log10(SPPR)) and log10(SPPR) vs TL

p1

Figure 3. Relation ship between \(\log_{10}\text(SPPR)\) and TL

p2

Figure 4. Relation ship between \(\text(mean)\log_{10}\text(SPPR)\) and TL

When only considered species which are presented as single species compartment

p3 <- ggplot(data=df_S, aes(x=TL, y=SPPR, colour=Scientific.name))+
  geom_point()+
  scale_colour_discrete(guide=FALSE)+
  geom_smooth(method="lm", se=FALSE)+
  facet_wrap(~Scientific.name, nrow=2)+
  stat_smooth_func(geom="text",method="lm",hjust=0, vjust=-1,parse=TRUE)


p4 <- ggplot(data=dt_S, aes(x=MeanTL, y=MeanSPPR))+
  geom_point(aes(colour=Scientific.name))+
  geom_smooth(method="lm", se=FALSE)+
  stat_smooth_func(geom="text",method="lm",hjust=0, vjust=-1,parse=TRUE)

dx <- dx[-which(dx\(TL%in%sort(dx\)TL)[c((nrow(dx)-1):nrow(dx))]),]

p3

Figure 5. Relation ship between \(\log_{10}\text(SPPR)\) and TL when only consider single species compartments

p4

Figure 6. Relation ship between \(\text(mean)\log_{10}\text(SPPR)\) and TL when only consider single species compartments

In these case, only TL is cannot explain the the changes in \(\log_{10}\text(SPPR)\).

If I understand well, the linear regression allows one predict the mean of response variables based on predictors, but not to predict the response variable its self. That why I do not know how should we interprete the Figure 4 and Figure 6.

This code is to final analysis after discussion and need to be improved

LDA

Jan, 2016