ELEFAN_Perm

R Markdown


### ELEFAN_Perm -  A permutation-based signifficance test for modal progression in LFD distributions
# (Copyright: Ralf Schwamborn, April 2018)
# LFD: "Length-Frequency-Distribution"
# ELEFAN_Perm, version 1.0 (beta)

# Null hypothesis tested: 1, no modes, no progression (chaos,white noise)
# Method: random permutation (sampling without replacement)

# First, letÂ´s look at two simple "textbook" examples of permutation

##############
#### A. Two  Examples (for illustration)

# A.1 Example From the basic R2 sample function help setion
# A.1 Permutation vs Bootstrap Resampling
  x <- 1:12
  # a random permutation
  sample(x, replace = FALSE)
  
  # bootstrap resampling
  sample(x, replace = TRUE)
  
  
  ##############  
# A.2 Linear Regression example
# A.2 Testing  significance of linear regression slope with a permutation test
# "Textbook" example

  # test statistic: slope
  # Null hypothesis slope = zero (data with  random, white noise, distribution)
  # Question: is the slope signifficanlty (p < 0.05) different from zero?
  
  
 x <- 1:10
 
 #y0 = 1+2*x
 #rand <- rnorm(n = 10, mean = 0, sd = 3)
 #y = round(y0 +rand)
 #
 # y = c(-1 , 8 , 2 , 9, 14, 17, 16, 19, 26,27)
 
 
 y = c(1, 16, 19, 26, 30)
 x = c(1, 2, 3, 4, 5)
 
 
 #plot with regression line
 plot(y~x)
 abline(lm(y~x))
 
 coef(lm (y ~x) ) # slope = 2.951515  (is this diffrent from zero? Null hypothesis: slope = zero)
 slope <- as.numeric(coef(lm (y ~x) )[2]) # slope = 2.951515
 # test statistic: slope
 
 # permutate the "y" data
 
 # short script to test the null hypothesis of the slope by permutation
 
 mean (y); sd(y)
 
 y.permsam1 <-  sample(y, replace = FALSE)
 
 mean(y.permsam1) ; sd(y.permsam1) # permuted sample has  same mean and sd as original data
 slope.perm1 <- as.numeric(coef(lm (y.permsam1 ~x) )[2]) # slope = 2.951515
 (slope.perm1) # slope is each time a different, random number
 
 #plot
 plot(y.permsam1~x)
 abline(lm(y.permsam1~x))
 
 
# As loop (permute many times)

 NPerm = 5000 # takes a few seconds
 
 outp.list0 = rep.int(NA,NPerm )
 
for (w in 1:NPerm) {  
 
  # x <- 1:10
 # y = c(-1 , 8 , 2 , 9, 14, 17, 16, 19, 26,27)
  
  y = c(1, 16, 19, 26, 30)
  x = c(1, 2, 3, 4, 5)
  
  y.permsam1 <-  sample(y, replace = FALSE)
  
  mean(y.permsam1) ; sd(y.permsam1) # permutated sample has  same mean and sd as original data
  slope.perm1 <- as.numeric(coef(lm (y.permsam1 ~x) )[2]) # slope = 2.951515
  
   
  outp.list0[w]  <- slope.perm1
  
  }

 # Analyse outputs
 # View (outp.list0#

hist(outp.list0, main = "Posterior distrib. for the SLOPE", col = "grey")
abline(v = slope, col = "green")



# 95% ConfidencE interval for the set of permuted versions of the slope (not for the original one)
quantile(outp.list0, c(0.025, 0.975))

quantiles95 <- quantile(outp.list0, c(0.025, 0.975))
abline(v = quantiles95, col = "red")


# approximate "p" value (becomes more precise mith higher NPerm)

#Nlarger <- length ( outp.list0[outp.list0 >= quantile(outp.list0, 0.975)])
#Nsmaller <- length ( outp.list0[outp.list0 < quantile(outp.list0, 0.975)])

Nlarger <- length ( outp.list0[outp.list0 >= slope])
Nsmaller <- length ( outp.list0[outp.list0 < slope])


p_value <- Nlarger/Nsmaller # p < 0.0001, the slope is signifficant, OK
p_value

# compare this with the parametric "p" estimate (OK)

summary(lm (y ~x) )

##############

### Application to LFD analysis 
## B. A permutation-based sign. test for modal progression in LFD distributions
# test statistic: Phi' (growth performance index)
# Null hypothesis: Phi'of LFD data  = Phi' derived from random permuted LFD data
# Question: is Phi' signifficanlty (p < 0.05) different from random?

library(TropFishR)
data(alba) # "alba": very small and imperfect dataset (7 months only, few size bins, 675 inds. only

# B.1 plot original LFD data
plot(alba, Fname = "catch", date.axis = "modern")

# B.2 one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(alba, popSize = 40, maxiter = 12,
           MA = 9)

# B.3 Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
#  $par$phiL:   2.030329, 2.009758, 1.983218, 1.978591, 1.965799
Phi.alba <- c(2.030329, 2.009758, 1.983218, 1.978591, 1.965799)
Phi_medvalue <-  as.numeric(median(Phi.alba))
Phi_medvalue # 1.98

# B.4 permutate the "catch" histograms

alba.perm1 <- alba # permuted version of the "alba" dataset


# B. 5. loop that permutes the catch month by month


for (w in 1: ncol(alba.perm1$catch)){

alba.perm1$catch[,w] <-  sample(alba$catch[,w], replace = FALSE)

}

# B.6 plot the permuted ("messed up") LFD data
plot(alba.perm1, Fname = "catch", date.axis = "modern")

# B.7 one-time fit of the VBGF curve to the resample LFD data
res1 <- ELEFAN_GA(alba.perm1, popSize = 40, maxiter = 10,
          MA = 9)
res1
# B. 8 Results of fit 5 runs, gives 5 slightlÃ§y diffenrt values for Phi' (all Phi' are around 2):
#  $par$phiL:   2.030329, 2.009758, 1.983218, 1.978591, 1.965799

Phi.alba.perm1 <- c(1.042169, 1.044718, 1.237749, 1.202476,  1.746247)
Phi_medvalue.perm1 <-  round(median(Phi_medvalue.perm1), 3)
Phi_medvalue.perm1 


##############
  
## C. Full Analysis as loop with many Permutatios


# C.1. loop that permutes the catch month by month


# As loop (permute many times)

NPerm = 400 # takes a few minutes



Linf <- rep(NA, 1) 
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)


outp.table <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
                         dat.time)

# outp.list = rep.int(NA,NPerm )



for (w in 1:NPerm) {  
  
  alba.perm1 <- alba # permuted version of the "alba" dataset
  
  for (i in 1: ncol(alba.perm1$catch)){
    alba.perm1$catch[,i] <-  sample(alba$catch[,i], replace = FALSE)
  }
  
  
  res1 <- ELEFAN_GA(alba.perm1, popSize = 40, maxiter = 8,
                    MA = 9, plot.score = FALSE)
  res1$par$phiL
  
  # outp.list[w]  <- res1$par$phiL
  
  outp.table[w,1]     <- round(as.numeric(res1$par[1]), 2)
  outp.table[w,2]     <- round(as.numeric(res1$par[2]), 3)
  outp.table[w,3]     <- round(as.numeric(res1$par[3]), 3)
  outp.table[w,4]     <- round(as.numeric(res1$par[4]), 3)
  outp.table[w,5]     <- round(res1$Rn_max, 4)
  outp.table[w,6]                  <- Sys.time()
  
  
}

outp.table

# outp.list
 
outp.list <- outp.table$Phi_prime  # choose data to be analyzed (e.g. Phi')


# C.2 export results
write.table(outp.table, file = "outp_list233alb_B2.csv", sep = ";", row.names = FALSE, col.names = FALSE)

outp.table <- read.table(file = "outp_list233alb_B2.csv", blank.lines.skip  = TRUE)

outp.list <- as.numeric(outp.list$V1[1:234])



# C. 3 plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red) 
#  and original  estimate for Phi'  (green).
#  If it was signifficant, the green line should be outside the red lines.

hist(outp.list, main = "Posterior distrib. for Phi' ")

d <- density(outp.list, adjust=0.9)
plot(d, type = "n",  xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")

d$y
d$x

hist(outp.list, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
a


abline(v = Phi.alba, col = "yellow")
abline(v = Phi_medvalue, col = "green")


# 95% Confidenc interval for Phi'
quantile(outp.list, c(0.025, 0.975)) # 0.6 to 2.2
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list, c(0.025, 0.975))
abline(v = quantile(outp.list, c(0.95)) , col = "red") # limit of 95% Permtest values

quantiles95 # 95% Confid. Interval


text(3.25,0.5, " p=0.53, Nm=7months")
text(3.25,0.3, " NPerm=400")



# Calculate "p" that Phi' is different from (i.e, larger than )  a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage)  of data that are equal or above the observed value (should be less than 5%)

Nlarger <- length ( outp.list[outp.list >= Phi_medvalue])
Nsmaller <- length ( outp.list[outp.list < Phi_medvalue])

p_value <- Nlarger/Nsmaller 
p_value # 0.53 - LFD distributions are NOT signifficvantly different from "white noise"

# Conclusion: Phi' is NOT signifficantly (p = 0.66) different from Phi' obtained from "white noise", for the "alba" dataset


##############
# C.4. Now for the SYNLFQ5B dataset, 
# perfect synthetic data, 12 months  (SLOW, low "p" value)

synLF5b <- synLFQ5  # select first 12 months from synLFQ5
synLF5b$catch <- synLF5b$catch[,1:12]
synLF5b$dates <- synLF5b$dates[1:12]

sum(synLF5b$catch)

# C.4.a plot original LFD data
plot(synLF5b, Fname = "catch", date.axis = "modern")

# C.4.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5b, popSize = 40, maxiter = 20,
          MA = 9)# takes about 11 seconds

# C.4.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
#  $par$phiL:   ...
Phi.syn5 <- c(3.537974,  3.499549, 3.492911, 3.504784, 3.500939)
Phi_medvalue <-  as.numeric(median(Phi.syn5))
Phi_medvalue # 3.5004 (originally used Phi' was 3.5)



# C.4d As loop (permute many times)

NPerm = 400 # takes a while, approx. 12 secs per run...



Linf <- rep(NA, 1) 
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)


outp.table3 <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
                         dat.time)

# outp.list3 = rep.int(NA,NPerm )



for (w in 1:NPerm) {  
  
  
  synLF5b <- synLFQ5
  synLF5b$catch <- synLF5b$catch[,1:12]
  synLF5b$dates <- synLF5b$dates[1:12]
  
  synLF5b.perm1 <- synLF5b # permuted version of the "syn5" dataset
  
  for (i in 1: ncol(synLF5b.perm1$catch)){
    synLF5b.perm1$catch[,i] <-  sample(synLF5b$catch[,i], replace = FALSE)
  }
  
  
  res1 <- ELEFAN_GA(synLF5b.perm1, popSize = 40, maxiter = 8,
                    MA = 9, plot.score = FALSE)
  res1$par$phiL
  
  # outp.list[w]  <- res1$par$phiL
  
  outp.table3[w,1]     <- round(as.numeric(res1$par[1]), 2)
  outp.table3[w,2]     <- round(as.numeric(res1$par[2]), 3)
  outp.table3[w,3]     <- round(as.numeric(res1$par[3]), 3)
  outp.table3[w,4]     <- round(as.numeric(res1$par[4]), 3)
  outp.table3[w,5]     <- round(res1$Rn_max, 4)
  outp.table3[w,6]                  <- Sys.time()
  
  
}

outp.table3

# outp.list3

outp.list3 <- outp.table3$Phi_prime  # choose data to be analyzed (e.g. Phi')

outp.list3

# View(outp.list3)

# C.2 export results
write.table(outp.table3, file = "outp_table3_syn5b_12M_400_Ciii.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list3, file = "outp.list3_syn5b_12M_400_Ciii.csv", sep = ";", row.names = FALSE, col.names = FALSE)

#outp.list3 <- read.table(file = "outp_list_syn272_Phi_p_onlyCXXII.csv", blank.lines.skip  = TRUE)

# outp.list3 <- as.numeric(outp.list3$V1)



# C. 3 plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red) 
#  and original  estimate for Phi'  (green).
#  If it was signifficant, the green line should be outside the red lines.

hist(outp.list3, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4))
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")

d <- density(outp.list3, adjust=0.9)
plot(d, type = "n",  xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")

d$y
d$x

hist(outp.list3, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2) 

text(1,0.5, " p=0.0025, Nm=12months")
text(1,0.3, " NPerm=400")


# 95% Confidenc interval for Phi'
quantile(outp.list3, c(0.95)) # 0.6 to 2.2
abline(v = quantile(outp.list3, c(0.95)), col = "red")
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list3, c(0.025, 0.975))
# abline(v = quantiles95, col = "red")

quantiles95 # 95% Confid. Interval


# Calculate "p" that Phi' is different from (i.e, larger than )  a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage)  of data that are equal or above the observed value (should be less than 5%)

Nlarger <- length ( outp.list3[outp.list3 >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list3[outp.list3 < Phi_medvalue]) # raw data

p_value <- Nlarger/Nsmaller # raw data "p" , OK!
p_value # 0 - LFD distributions are NOT signifficvantly different from "white noise" , OK!

# Conclusion: Phi' is signifficantly (p = < 0.0001) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset


NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data

p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD # 
# Problem: tails of density distribs depend on subjective choice of smoothing parameter 
# Solution: use only raw posteriors (no interpolation) and MANY Perms (> 400)

# interesting observation: Phi' for bogus data is between 2 and 3.5 for "white noise", never zero!


##############
# D. Now for the SYNLFQ5C dataset, 
# perfect synthetic data, but 8 months only (SLOW, low "p" value)

synLF5c <- synLFQ5 # select 8 months from synLFQ5
synLF5c$catch <- synLF5c$catch[,c(1,2,3,5,7,9,10,12)]
synLF5c$dates <- synLF5c$dates[c(1,2,3,5,7,9,10,12)]

# D.4.a plot original LFD data
plot(synLF5c, Fname = "catch", date.axis = "modern")

# D.4.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5c, popSize = 40, maxiter = 20,
          MA = 9)# takes about 7 seconds

# D.4.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
#  $par$phiL:   ...
Phi.syn5 <- c(3.550812,  3.538186, 3.531929, 3.208253,  3.504487)
Phi_medvalue <-  as.numeric(median(Phi.syn5))
Phi_medvalue # 3.53 (originally used Phi' was 3.5)



# D.4d As loop (permute many times)

NPerm = 300 # takes a while, approx. 8 secs per run...



Linf <- rep(NA, 1) 
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)


outp.table4 <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
                          dat.time)

# outp.list4 = rep.int(NA,NPerm )



for (w in 1:NPerm) {  
  
  
  synLF5c <- synLFQ5
  synLF5c$catch <- synLF5c$catch[,c(1,2,3,5,7,9,10,12)]
  synLF5c$dates <- synLF5c$dates[c(1,2,3,5,7,9,10,12)]
  
  synLF5c.perm1 <- synLF5c # permuted version of the "syn5" dataset
  
  for (i in 1: ncol(synLF5c.perm1$catch)){
    synLF5c.perm1$catch[,i] <-  sample(synLF5c$catch[,i], replace = FALSE)
  }
  
  
  res1 <- ELEFAN_GA(synLF5c.perm1, popSize = 40, maxiter = 8,
                    MA = 9, plot.score = FALSE)
  res1$par$phiL
  
  # outp.list[w]  <- res1$par$phiL
  
  outp.table4[w,1]     <- round(as.numeric(res1$par[1]), 2)
  outp.table4[w,2]     <- round(as.numeric(res1$par[2]), 3)
  outp.table4[w,3]     <- round(as.numeric(res1$par[3]), 3)
  outp.table4[w,4]     <- round(as.numeric(res1$par[4]), 3)
  outp.table4[w,5]     <- round(res1$Rn_max, 4)
  outp.table4[w,6]                  <- Sys.time()
  
  
}

outp.table4

# outp.list4

outp.list4 <- outp.table4$Phi_prime  # choose data to be analyzed (e.g. Phi')

outp.list4
summary(outp.list4)

# D.5  export results
write.table(outp.table4, file = "outp_table4cc_syn5c300_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list4, file = "outp_list4cc_syn5c300_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)


outp.list4.o <- read.table(file = "outp_list_syn100_CiV.csv", blank.lines.skip  = TRUE)

# outp.list4 <- as.numeric(outp.list4$V1[1:234])



# D.6 plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red) 
#  and original  estimate for Phi'  (green).
#  If it was signifficant, the green line should be outside the red lines.

hist(outp.list4, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4))
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")

d <- density(outp.list4, adjust=0.9)
plot(d, type = "n",  xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")

d$y
d$x

hist(outp.list4, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2) 


# 95% Confidenc interval for Phi'
quantile(outp.list4, c(0.025, 0.975)) # 0.6 to 2.2
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list4, c(0.025, 0.975))
abline(v = quantiles95, col = "red")

quantiles95 # 95% Confid. Interval


# D.8 Calculate "p" that Phi' is different from (i.e, larger than )  a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage)  of data that are equal or above the observed value (should be less than 5%)

Nlarger <- length ( outp.list4[outp.list4 >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list4[outp.list4 < Phi_medvalue]) # raw data

p_value <- Nlarger/Nsmaller # raw data "p"
p_value # 0.003 - LFD distributions are NOT signifficvantly different from "white noise"

# Conclusion: Phi' is signifficantly (p = < 0.01) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset

NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data

p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD # 0 - LFD distributions are NOT signifficvantly different from "white noise"



# Problem: tails of density distribs depend on subjective choice of smoothing parameter 
# Solution: use only raw posteriors (no interpolation) and MANY Perms (> 400)


##############
# E. Now for the SYNLFQ5C dataset, 
# perfect synthetic data, but 4 months only (SLOW, low "p" value)

synLF5d <- synLFQ5 # select 4 months from synLFQ5
synLF5d$catch <- synLF5d$catch[,c(1,3,7,12)]
synLF5d$dates <- synLF5d$dates[c(1,3, 7, 12)]

# E.a plot original LFD data
plot(synLF5d, Fname = "catch", date.axis = "modern")

# E.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5d, popSize = 40, maxiter = 20,
          MA = 9)# takes about 5 seconds

# E.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
#  $par$phiL:   ...
Phi.syn5 <- c(2.951843,  3.224271, 3.530952,  3.534409,  3.50088)
Phi_medvalue <-  as.numeric(median(Phi.syn5))
Phi_medvalue # 3.50088 (originally used Phi' was 3.5)


# E.d As loop (permute many times)

NPerm = 400 # takes a while, approx. 6 secs per run...



Linf <- rep(NA, 1) 
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)


outp.table4d <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
                          dat.time)

# outp.list4d = rep.int(NA,NPerm )



for (w in 1:NPerm) {  
  
  
  synLF5d <- synLFQ5 # select 4 months from synLFQ5
  synLF5d$catch <- synLF5d$catch[,c(1,3,7,12)]
  synLF5d$dates <- synLF5d$dates[c(1,3,7,12)]
  
  synLF5d.perm1 <- synLF5d # permuted version of the "syn5" dataset
  
  for (i in 1: ncol(synLF5d.perm1$catch)){
    synLF5d.perm1$catch[,i] <-  sample(synLF5d$catch[,i], replace = FALSE)
  }
  
  
  res1 <- ELEFAN_GA(synLF5d.perm1, popSize = 40, maxiter = 8,
                    MA = 9, plot.score = FALSE)
  res1$par$phiL
  
  # outp.list[w]  <- res1$par$phiL
  
  outp.table4d[w,1]     <- round(as.numeric(res1$par[1]), 2)
  outp.table4d[w,2]     <- round(as.numeric(res1$par[2]), 3)
  outp.table4d[w,3]     <- round(as.numeric(res1$par[3]), 3)
  outp.table4d[w,4]     <- round(as.numeric(res1$par[4]), 3)
  outp.table4d[w,5]     <- round(res1$Rn_max, 4)
  outp.table4d[w,6]                  <- Sys.time()
  
  
}

outp.table4d

# outp.list4d

outp.list4d <- outp.table4d$Phi_prime  # choose data to be analyzed (e.g. Phi')

outp.list4d
summary(outp.list4d)

# E.e  export results
write.table(outp.table4d, file = "outp_table4d_syn5d400_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list4d, file = "outp_list4d_syn5d400_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)


outp.list4d.o <- read.table(file = "outp_list_syn100_CiV.csv", blank.lines.skip  = TRUE)

# outp.list4d <- as.numeric(outp.list4d$V1[1:234])



# E.f plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red) 
#  and original  estimate for Phi'  (green).
#  If it was signifficant, the green line should be outside the red lines.

hist(outp.list4d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4))
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")

d <- density(outp.list4d, adjust=0.9)
plot(d, type = "n",  xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")

d$y
d$x

hist(outp.list4d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2) 


# 95% Confidenc interval for Phi'
quantile(outp.list4d, c(0.025, 0.975)) # 0.6 to 2.2
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list4d, c(0.025, 0.975))
abline(v = quantiles95, col = "red")

quantiles95 # 95% Confid. Interval


# E.g Calculate "p" that Phi' is different from (i.e, larger than )  a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage)  of data that are equal or above the observed value (should be less than 5%)

Nlarger <- length ( outp.list4d[outp.list4d >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list4d[outp.list4d < Phi_medvalue]) # raw data

p_value <- Nlarger/Nsmaller # raw data "p"
p_value # 0.028 - LFD distributions are signifficvantly different from "white noise"

# Conclusion: Phi' is signifficantly (p = < 0.01) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset

NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data

p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD # 0.24 - LFD distributions are NOT signifficvantly different from "white noise"


# Problem: tails of density distribs depend on subjective choice of smoothing parameter 
# Solution: use only raw posteriors (no interpolation) and MANY Perms (> 400)

##############
# F. Now for the SYNLFQ5D dataset, 
# perfect synthetic data, but 3 months only (FAST, low "p" value)

library(TropFishR)
library(parallel)

synLF5e <- synLFQ5 # select 3 months from synLFQ5
synLF5e$catch <- synLF5e$catch[,c(1,3,12)]
synLF5e$dates <- synLF5e$dates[c(1,3, 12)]

sum(synLF5e$catch )# 11,815 indiv.

# F.a plot original LFD data
plot(synLF5e, Fname = "catch", date.axis = "modern")

# F.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5e, popSize = 40, maxiter = 20,
          MA = 9)# takes about 5-6 seconds

# F.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
#  $par$phiL:   ...
Phi.syn5 <- c(3.236708,   3.239536, 3.514382,  3.262346,   3.250434, 3.191535)
Phi_medvalue <-  as.numeric(median(Phi.syn5))
Phi_medvalue # 3.24 (originally used Phi' was 3.5)



# F.d As loop (permute many times)

NPerm = 400 # takes a while, approx. 6 secs per run...



Linf <- rep(NA, 1) 
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)


outp.table5d <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
                           dat.time)

# outp.list4d = rep.int(NA,NPerm )



for (w in 1:NPerm) {  
  
  
  synLF5e <- synLFQ5 # select 3 months from synLFQ5
  synLF5e$catch <- synLF5e$catch[,c(1,3,12)]
  synLF5e$dates <- synLF5e$dates[c(1,3, 12)]
  
  synLF5e.perm1 <- synLF5e # permuted version of the "syn5" dataset
  
  for (i in 1: ncol(synLF5e.perm1$catch)){
    synLF5e.perm1$catch[,i] <-  sample(synLF5e$catch[,i], replace = FALSE)
  }
  
  
  res1 <- ELEFAN_GA(synLF5e.perm1, popSize = 40, maxiter = 8,
                    MA = 9, plot.score = FALSE)
  res1$par$phiL
  
  # outp.list[w]  <- res1$par$phiL
  
  outp.table4d[w,1]     <- round(as.numeric(res1$par[1]), 2)
  outp.table5d[w,2]     <- round(as.numeric(res1$par[2]), 3)
  outp.table5d[w,3]     <- round(as.numeric(res1$par[3]), 3)
  outp.table5d[w,4]     <- round(as.numeric(res1$par[4]), 3)
  outp.table5d[w,5]     <- round(res1$Rn_max, 4)
  outp.table5d[w,6]                  <- Sys.time()
  
  
}

outp.table5d

# outp.table5d

outp.list5d <- outp.table5d$Phi_prime  # choose data to be analyzed (e.g. Phi')

#outp.list5d <- outp.table5d  # choose data to be analyzed (e.g. Phi')

# outp.list5d <- as.numeric(outp.table5d$V1)


outp.list5d
summary(outp.list5d)

# F.e  export results
write.table(outp.table5d, file = "outp_table5d_syn5d_400_3months_CVii.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list5d, file = "outp_listd_syn5d_400_3monthsdCVii.csv", sep = ";", row.names = FALSE, col.names = FALSE)


outp.list5d <- read.table(file = "outp_listd_syn5d_400_3monthsdCVii.csv", blank.lines.skip  = TRUE)

# outp.list5d <- as.numeric(outp.list5d$V1)



# F.f plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red) 
#  and original  estimate for Phi'  (green).
#  If it was signifficant, the green line should be outside the red lines.

hist(outp.list5d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4),
     xlab = "Phi'")
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")

d <- density(outp.list5d, adjust=0.9)
plot(d, type = "n",  xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")

d$y
d$x

hist(outp.list5d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE,
     xlab = "Phi'")
# abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "green")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2) 


# 95% Confidenc interval for Phi'
quantile(outp.list5d, c(0.025, 0.975)) # 0.6 to 2.2

abline(v =quantile(outp.list5d, c(0.95)), col = "red") # red line where 95% of the data are located

# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list5d, c(0.025, 0.975))
# abline(v = quantiles95, col = "red")

quantiles95 # 95% Confid. Interval





# F.g Calculate "p" that Phi' is different from (i.e, larger than )  a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage)  of data that are equal or above the observed value (should be less than 5%)

Nlarger <- length ( outp.list5d[outp.list5d >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list5d[outp.list5d < Phi_medvalue]) # raw data

p_value <- Nlarger/Nsmaller # raw data "p"
p_value # 0.2 - LFD distributions are NOT signifficvantly different from "white noise"

text(1,0.5, " p=0.205, Nm=3months")
text(1,0.3, " NPerm=400")

# Conclusion: Phi' is NOT signifficantly (p = < 0.01) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset

NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data

p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD # 0.51 - LFD distributions are NOT signifficvantly different from "white noise"


# IMPORTANT: Use only raw posteriors (no interpolation) and MANY Perms (> 400)
# Do not use kernel density smoothing for "p".  Tails of density distribs depend on subjective choice of smoothing parameter 
# Use kernel density smoothing only for graphical purposes.
ELEFAN_Perm_v0beta

Ralf Schwamborn

26 de abril de 2018

R Markdown