R Markdown
### ELEFAN_Perm - A permutation-based signifficance test for modal progression in LFD distributions
# (Copyright: Ralf Schwamborn, April 2018)
# LFD: "Length-Frequency-Distribution"
# ELEFAN_Perm, version 1.0 (beta)
# Null hypothesis tested: 1, no modes, no progression (chaos,white noise)
# Method: random permutation (sampling without replacement)
# First, let´s look at two simple "textbook" examples of permutation
##############
#### A. Two Examples (for illustration)
# A.1 Example From the basic R2 sample function help setion
# A.1 Permutation vs Bootstrap Resampling
x <- 1:12
# a random permutation
sample(x, replace = FALSE)
# bootstrap resampling
sample(x, replace = TRUE)
##############
# A.2 Linear Regression example
# A.2 Testing significance of linear regression slope with a permutation test
# "Textbook" example
# test statistic: slope
# Null hypothesis slope = zero (data with random, white noise, distribution)
# Question: is the slope signifficanlty (p < 0.05) different from zero?
x <- 1:10
#y0 = 1+2*x
#rand <- rnorm(n = 10, mean = 0, sd = 3)
#y = round(y0 +rand)
#
# y = c(-1 , 8 , 2 , 9, 14, 17, 16, 19, 26,27)
y = c(1, 16, 19, 26, 30)
x = c(1, 2, 3, 4, 5)
#plot with regression line
plot(y~x)
abline(lm(y~x))
coef(lm (y ~x) ) # slope = 2.951515 (is this diffrent from zero? Null hypothesis: slope = zero)
slope <- as.numeric(coef(lm (y ~x) )[2]) # slope = 2.951515
# test statistic: slope
# permutate the "y" data
# short script to test the null hypothesis of the slope by permutation
mean (y); sd(y)
y.permsam1 <- sample(y, replace = FALSE)
mean(y.permsam1) ; sd(y.permsam1) # permuted sample has same mean and sd as original data
slope.perm1 <- as.numeric(coef(lm (y.permsam1 ~x) )[2]) # slope = 2.951515
(slope.perm1) # slope is each time a different, random number
#plot
plot(y.permsam1~x)
abline(lm(y.permsam1~x))
# As loop (permute many times)
NPerm = 5000 # takes a few seconds
outp.list0 = rep.int(NA,NPerm )
for (w in 1:NPerm) {
# x <- 1:10
# y = c(-1 , 8 , 2 , 9, 14, 17, 16, 19, 26,27)
y = c(1, 16, 19, 26, 30)
x = c(1, 2, 3, 4, 5)
y.permsam1 <- sample(y, replace = FALSE)
mean(y.permsam1) ; sd(y.permsam1) # permutated sample has same mean and sd as original data
slope.perm1 <- as.numeric(coef(lm (y.permsam1 ~x) )[2]) # slope = 2.951515
outp.list0[w] <- slope.perm1
}
# Analyse outputs
# View (outp.list0#
hist(outp.list0, main = "Posterior distrib. for the SLOPE", col = "grey")
abline(v = slope, col = "green")
# 95% ConfidencE interval for the set of permuted versions of the slope (not for the original one)
quantile(outp.list0, c(0.025, 0.975))
quantiles95 <- quantile(outp.list0, c(0.025, 0.975))
abline(v = quantiles95, col = "red")
# approximate "p" value (becomes more precise mith higher NPerm)
#Nlarger <- length ( outp.list0[outp.list0 >= quantile(outp.list0, 0.975)])
#Nsmaller <- length ( outp.list0[outp.list0 < quantile(outp.list0, 0.975)])
Nlarger <- length ( outp.list0[outp.list0 >= slope])
Nsmaller <- length ( outp.list0[outp.list0 < slope])
p_value <- Nlarger/Nsmaller # p < 0.0001, the slope is signifficant, OK
p_value
# compare this with the parametric "p" estimate (OK)
summary(lm (y ~x) )
##############
### Application to LFD analysis
## B. A permutation-based sign. test for modal progression in LFD distributions
# test statistic: Phi' (growth performance index)
# Null hypothesis: Phi'of LFD data = Phi' derived from random permuted LFD data
# Question: is Phi' signifficanlty (p < 0.05) different from random?
library(TropFishR)
data(alba) # "alba": very small and imperfect dataset (7 months only, few size bins, 675 inds. only
# B.1 plot original LFD data
plot(alba, Fname = "catch", date.axis = "modern")
# B.2 one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(alba, popSize = 40, maxiter = 12,
MA = 9)
# B.3 Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
# $par$phiL: 2.030329, 2.009758, 1.983218, 1.978591, 1.965799
Phi.alba <- c(2.030329, 2.009758, 1.983218, 1.978591, 1.965799)
Phi_medvalue <- as.numeric(median(Phi.alba))
Phi_medvalue # 1.98
# B.4 permutate the "catch" histograms
alba.perm1 <- alba # permuted version of the "alba" dataset
# B. 5. loop that permutes the catch month by month
for (w in 1: ncol(alba.perm1$catch)){
alba.perm1$catch[,w] <- sample(alba$catch[,w], replace = FALSE)
}
# B.6 plot the permuted ("messed up") LFD data
plot(alba.perm1, Fname = "catch", date.axis = "modern")
# B.7 one-time fit of the VBGF curve to the resample LFD data
res1 <- ELEFAN_GA(alba.perm1, popSize = 40, maxiter = 10,
MA = 9)
res1
# B. 8 Results of fit 5 runs, gives 5 slightlçy diffenrt values for Phi' (all Phi' are around 2):
# $par$phiL: 2.030329, 2.009758, 1.983218, 1.978591, 1.965799
Phi.alba.perm1 <- c(1.042169, 1.044718, 1.237749, 1.202476, 1.746247)
Phi_medvalue.perm1 <- round(median(Phi_medvalue.perm1), 3)
Phi_medvalue.perm1
##############
## C. Full Analysis as loop with many Permutatios
# C.1. loop that permutes the catch month by month
# As loop (permute many times)
NPerm = 400 # takes a few minutes
Linf <- rep(NA, 1)
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)
outp.table <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
dat.time)
# outp.list = rep.int(NA,NPerm )
for (w in 1:NPerm) {
alba.perm1 <- alba # permuted version of the "alba" dataset
for (i in 1: ncol(alba.perm1$catch)){
alba.perm1$catch[,i] <- sample(alba$catch[,i], replace = FALSE)
}
res1 <- ELEFAN_GA(alba.perm1, popSize = 40, maxiter = 8,
MA = 9, plot.score = FALSE)
res1$par$phiL
# outp.list[w] <- res1$par$phiL
outp.table[w,1] <- round(as.numeric(res1$par[1]), 2)
outp.table[w,2] <- round(as.numeric(res1$par[2]), 3)
outp.table[w,3] <- round(as.numeric(res1$par[3]), 3)
outp.table[w,4] <- round(as.numeric(res1$par[4]), 3)
outp.table[w,5] <- round(res1$Rn_max, 4)
outp.table[w,6] <- Sys.time()
}
outp.table
# outp.list
outp.list <- outp.table$Phi_prime # choose data to be analyzed (e.g. Phi')
# C.2 export results
write.table(outp.table, file = "outp_list233alb_B2.csv", sep = ";", row.names = FALSE, col.names = FALSE)
outp.table <- read.table(file = "outp_list233alb_B2.csv", blank.lines.skip = TRUE)
outp.list <- as.numeric(outp.list$V1[1:234])
# C. 3 plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red)
# and original estimate for Phi' (green).
# If it was signifficant, the green line should be outside the red lines.
hist(outp.list, main = "Posterior distrib. for Phi' ")
d <- density(outp.list, adjust=0.9)
plot(d, type = "n", xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")
d$y
d$x
hist(outp.list, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
a
abline(v = Phi.alba, col = "yellow")
abline(v = Phi_medvalue, col = "green")
# 95% Confidenc interval for Phi'
quantile(outp.list, c(0.025, 0.975)) # 0.6 to 2.2
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list, c(0.025, 0.975))
abline(v = quantile(outp.list, c(0.95)) , col = "red") # limit of 95% Permtest values
quantiles95 # 95% Confid. Interval
text(3.25,0.5, " p=0.53, Nm=7months")
text(3.25,0.3, " NPerm=400")
# Calculate "p" that Phi' is different from (i.e, larger than ) a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage) of data that are equal or above the observed value (should be less than 5%)
Nlarger <- length ( outp.list[outp.list >= Phi_medvalue])
Nsmaller <- length ( outp.list[outp.list < Phi_medvalue])
p_value <- Nlarger/Nsmaller
p_value # 0.53 - LFD distributions are NOT signifficvantly different from "white noise"
# Conclusion: Phi' is NOT signifficantly (p = 0.66) different from Phi' obtained from "white noise", for the "alba" dataset
##############
# C.4. Now for the SYNLFQ5B dataset,
# perfect synthetic data, 12 months (SLOW, low "p" value)
synLF5b <- synLFQ5 # select first 12 months from synLFQ5
synLF5b$catch <- synLF5b$catch[,1:12]
synLF5b$dates <- synLF5b$dates[1:12]
sum(synLF5b$catch)
# C.4.a plot original LFD data
plot(synLF5b, Fname = "catch", date.axis = "modern")
# C.4.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5b, popSize = 40, maxiter = 20,
MA = 9)# takes about 11 seconds
# C.4.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
# $par$phiL: ...
Phi.syn5 <- c(3.537974, 3.499549, 3.492911, 3.504784, 3.500939)
Phi_medvalue <- as.numeric(median(Phi.syn5))
Phi_medvalue # 3.5004 (originally used Phi' was 3.5)
# C.4d As loop (permute many times)
NPerm = 400 # takes a while, approx. 12 secs per run...
Linf <- rep(NA, 1)
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)
outp.table3 <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
dat.time)
# outp.list3 = rep.int(NA,NPerm )
for (w in 1:NPerm) {
synLF5b <- synLFQ5
synLF5b$catch <- synLF5b$catch[,1:12]
synLF5b$dates <- synLF5b$dates[1:12]
synLF5b.perm1 <- synLF5b # permuted version of the "syn5" dataset
for (i in 1: ncol(synLF5b.perm1$catch)){
synLF5b.perm1$catch[,i] <- sample(synLF5b$catch[,i], replace = FALSE)
}
res1 <- ELEFAN_GA(synLF5b.perm1, popSize = 40, maxiter = 8,
MA = 9, plot.score = FALSE)
res1$par$phiL
# outp.list[w] <- res1$par$phiL
outp.table3[w,1] <- round(as.numeric(res1$par[1]), 2)
outp.table3[w,2] <- round(as.numeric(res1$par[2]), 3)
outp.table3[w,3] <- round(as.numeric(res1$par[3]), 3)
outp.table3[w,4] <- round(as.numeric(res1$par[4]), 3)
outp.table3[w,5] <- round(res1$Rn_max, 4)
outp.table3[w,6] <- Sys.time()
}
outp.table3
# outp.list3
outp.list3 <- outp.table3$Phi_prime # choose data to be analyzed (e.g. Phi')
outp.list3
# View(outp.list3)
# C.2 export results
write.table(outp.table3, file = "outp_table3_syn5b_12M_400_Ciii.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list3, file = "outp.list3_syn5b_12M_400_Ciii.csv", sep = ";", row.names = FALSE, col.names = FALSE)
#outp.list3 <- read.table(file = "outp_list_syn272_Phi_p_onlyCXXII.csv", blank.lines.skip = TRUE)
# outp.list3 <- as.numeric(outp.list3$V1)
# C. 3 plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red)
# and original estimate for Phi' (green).
# If it was signifficant, the green line should be outside the red lines.
hist(outp.list3, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4))
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
d <- density(outp.list3, adjust=0.9)
plot(d, type = "n", xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")
d$y
d$x
hist(outp.list3, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2)
text(1,0.5, " p=0.0025, Nm=12months")
text(1,0.3, " NPerm=400")
# 95% Confidenc interval for Phi'
quantile(outp.list3, c(0.95)) # 0.6 to 2.2
abline(v = quantile(outp.list3, c(0.95)), col = "red")
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list3, c(0.025, 0.975))
# abline(v = quantiles95, col = "red")
quantiles95 # 95% Confid. Interval
# Calculate "p" that Phi' is different from (i.e, larger than ) a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage) of data that are equal or above the observed value (should be less than 5%)
Nlarger <- length ( outp.list3[outp.list3 >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list3[outp.list3 < Phi_medvalue]) # raw data
p_value <- Nlarger/Nsmaller # raw data "p" , OK!
p_value # 0 - LFD distributions are NOT signifficvantly different from "white noise" , OK!
# Conclusion: Phi' is signifficantly (p = < 0.0001) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset
NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data
p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD #
# Problem: tails of density distribs depend on subjective choice of smoothing parameter
# Solution: use only raw posteriors (no interpolation) and MANY Perms (> 400)
# interesting observation: Phi' for bogus data is between 2 and 3.5 for "white noise", never zero!
##############
# D. Now for the SYNLFQ5C dataset,
# perfect synthetic data, but 8 months only (SLOW, low "p" value)
synLF5c <- synLFQ5 # select 8 months from synLFQ5
synLF5c$catch <- synLF5c$catch[,c(1,2,3,5,7,9,10,12)]
synLF5c$dates <- synLF5c$dates[c(1,2,3,5,7,9,10,12)]
# D.4.a plot original LFD data
plot(synLF5c, Fname = "catch", date.axis = "modern")
# D.4.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5c, popSize = 40, maxiter = 20,
MA = 9)# takes about 7 seconds
# D.4.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
# $par$phiL: ...
Phi.syn5 <- c(3.550812, 3.538186, 3.531929, 3.208253, 3.504487)
Phi_medvalue <- as.numeric(median(Phi.syn5))
Phi_medvalue # 3.53 (originally used Phi' was 3.5)
# D.4d As loop (permute many times)
NPerm = 300 # takes a while, approx. 8 secs per run...
Linf <- rep(NA, 1)
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)
outp.table4 <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
dat.time)
# outp.list4 = rep.int(NA,NPerm )
for (w in 1:NPerm) {
synLF5c <- synLFQ5
synLF5c$catch <- synLF5c$catch[,c(1,2,3,5,7,9,10,12)]
synLF5c$dates <- synLF5c$dates[c(1,2,3,5,7,9,10,12)]
synLF5c.perm1 <- synLF5c # permuted version of the "syn5" dataset
for (i in 1: ncol(synLF5c.perm1$catch)){
synLF5c.perm1$catch[,i] <- sample(synLF5c$catch[,i], replace = FALSE)
}
res1 <- ELEFAN_GA(synLF5c.perm1, popSize = 40, maxiter = 8,
MA = 9, plot.score = FALSE)
res1$par$phiL
# outp.list[w] <- res1$par$phiL
outp.table4[w,1] <- round(as.numeric(res1$par[1]), 2)
outp.table4[w,2] <- round(as.numeric(res1$par[2]), 3)
outp.table4[w,3] <- round(as.numeric(res1$par[3]), 3)
outp.table4[w,4] <- round(as.numeric(res1$par[4]), 3)
outp.table4[w,5] <- round(res1$Rn_max, 4)
outp.table4[w,6] <- Sys.time()
}
outp.table4
# outp.list4
outp.list4 <- outp.table4$Phi_prime # choose data to be analyzed (e.g. Phi')
outp.list4
summary(outp.list4)
# D.5 export results
write.table(outp.table4, file = "outp_table4cc_syn5c300_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list4, file = "outp_list4cc_syn5c300_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)
outp.list4.o <- read.table(file = "outp_list_syn100_CiV.csv", blank.lines.skip = TRUE)
# outp.list4 <- as.numeric(outp.list4$V1[1:234])
# D.6 plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red)
# and original estimate for Phi' (green).
# If it was signifficant, the green line should be outside the red lines.
hist(outp.list4, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4))
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
d <- density(outp.list4, adjust=0.9)
plot(d, type = "n", xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")
d$y
d$x
hist(outp.list4, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2)
# 95% Confidenc interval for Phi'
quantile(outp.list4, c(0.025, 0.975)) # 0.6 to 2.2
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list4, c(0.025, 0.975))
abline(v = quantiles95, col = "red")
quantiles95 # 95% Confid. Interval
# D.8 Calculate "p" that Phi' is different from (i.e, larger than ) a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage) of data that are equal or above the observed value (should be less than 5%)
Nlarger <- length ( outp.list4[outp.list4 >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list4[outp.list4 < Phi_medvalue]) # raw data
p_value <- Nlarger/Nsmaller # raw data "p"
p_value # 0.003 - LFD distributions are NOT signifficvantly different from "white noise"
# Conclusion: Phi' is signifficantly (p = < 0.01) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset
NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data
p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD # 0 - LFD distributions are NOT signifficvantly different from "white noise"
# Problem: tails of density distribs depend on subjective choice of smoothing parameter
# Solution: use only raw posteriors (no interpolation) and MANY Perms (> 400)
##############
# E. Now for the SYNLFQ5C dataset,
# perfect synthetic data, but 4 months only (SLOW, low "p" value)
synLF5d <- synLFQ5 # select 4 months from synLFQ5
synLF5d$catch <- synLF5d$catch[,c(1,3,7,12)]
synLF5d$dates <- synLF5d$dates[c(1,3, 7, 12)]
# E.a plot original LFD data
plot(synLF5d, Fname = "catch", date.axis = "modern")
# E.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5d, popSize = 40, maxiter = 20,
MA = 9)# takes about 5 seconds
# E.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
# $par$phiL: ...
Phi.syn5 <- c(2.951843, 3.224271, 3.530952, 3.534409, 3.50088)
Phi_medvalue <- as.numeric(median(Phi.syn5))
Phi_medvalue # 3.50088 (originally used Phi' was 3.5)
# E.d As loop (permute many times)
NPerm = 400 # takes a while, approx. 6 secs per run...
Linf <- rep(NA, 1)
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)
outp.table4d <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
dat.time)
# outp.list4d = rep.int(NA,NPerm )
for (w in 1:NPerm) {
synLF5d <- synLFQ5 # select 4 months from synLFQ5
synLF5d$catch <- synLF5d$catch[,c(1,3,7,12)]
synLF5d$dates <- synLF5d$dates[c(1,3,7,12)]
synLF5d.perm1 <- synLF5d # permuted version of the "syn5" dataset
for (i in 1: ncol(synLF5d.perm1$catch)){
synLF5d.perm1$catch[,i] <- sample(synLF5d$catch[,i], replace = FALSE)
}
res1 <- ELEFAN_GA(synLF5d.perm1, popSize = 40, maxiter = 8,
MA = 9, plot.score = FALSE)
res1$par$phiL
# outp.list[w] <- res1$par$phiL
outp.table4d[w,1] <- round(as.numeric(res1$par[1]), 2)
outp.table4d[w,2] <- round(as.numeric(res1$par[2]), 3)
outp.table4d[w,3] <- round(as.numeric(res1$par[3]), 3)
outp.table4d[w,4] <- round(as.numeric(res1$par[4]), 3)
outp.table4d[w,5] <- round(res1$Rn_max, 4)
outp.table4d[w,6] <- Sys.time()
}
outp.table4d
# outp.list4d
outp.list4d <- outp.table4d$Phi_prime # choose data to be analyzed (e.g. Phi')
outp.list4d
summary(outp.list4d)
# E.e export results
write.table(outp.table4d, file = "outp_table4d_syn5d400_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list4d, file = "outp_list4d_syn5d400_CiV.csv", sep = ";", row.names = FALSE, col.names = FALSE)
outp.list4d.o <- read.table(file = "outp_list_syn100_CiV.csv", blank.lines.skip = TRUE)
# outp.list4d <- as.numeric(outp.list4d$V1[1:234])
# E.f plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red)
# and original estimate for Phi' (green).
# If it was signifficant, the green line should be outside the red lines.
hist(outp.list4d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4))
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
d <- density(outp.list4d, adjust=0.9)
plot(d, type = "n", xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")
d$y
d$x
hist(outp.list4d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE)
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2)
# 95% Confidenc interval for Phi'
quantile(outp.list4d, c(0.025, 0.975)) # 0.6 to 2.2
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list4d, c(0.025, 0.975))
abline(v = quantiles95, col = "red")
quantiles95 # 95% Confid. Interval
# E.g Calculate "p" that Phi' is different from (i.e, larger than ) a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage) of data that are equal or above the observed value (should be less than 5%)
Nlarger <- length ( outp.list4d[outp.list4d >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list4d[outp.list4d < Phi_medvalue]) # raw data
p_value <- Nlarger/Nsmaller # raw data "p"
p_value # 0.028 - LFD distributions are signifficvantly different from "white noise"
# Conclusion: Phi' is signifficantly (p = < 0.01) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset
NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data
p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD # 0.24 - LFD distributions are NOT signifficvantly different from "white noise"
# Problem: tails of density distribs depend on subjective choice of smoothing parameter
# Solution: use only raw posteriors (no interpolation) and MANY Perms (> 400)
##############
# F. Now for the SYNLFQ5D dataset,
# perfect synthetic data, but 3 months only (FAST, low "p" value)
library(TropFishR)
library(parallel)
synLF5e <- synLFQ5 # select 3 months from synLFQ5
synLF5e$catch <- synLF5e$catch[,c(1,3,12)]
synLF5e$dates <- synLF5e$dates[c(1,3, 12)]
sum(synLF5e$catch )# 11,815 indiv.
# F.a plot original LFD data
plot(synLF5e, Fname = "catch", date.axis = "modern")
# F.b one-time fit of the VBGF curve to the original LFD data
ELEFAN_GA(synLF5e, popSize = 40, maxiter = 20,
MA = 9)# takes about 5-6 seconds
# F.c Results of fit 5 runs, gives 5 slightly diffenrt values for Phi' (all Phi' are around 2):
# $par$phiL: ...
Phi.syn5 <- c(3.236708, 3.239536, 3.514382, 3.262346, 3.250434, 3.191535)
Phi_medvalue <- as.numeric(median(Phi.syn5))
Phi_medvalue # 3.24 (originally used Phi' was 3.5)
# F.d As loop (permute many times)
NPerm = 400 # takes a while, approx. 6 secs per run...
Linf <- rep(NA, 1)
K <-rep(NA, 1)
t_anchor <-rep(NA, 1)
Phi_prime <- rep(NA, 1)
Rn_max <- rep(NA, 1)
dat.time <- rep(NA, 1) # current time (in secons)
outp.table5d <- data.frame(Linf, K, t_anchor, Phi_prime, Rn_max,
dat.time)
# outp.list4d = rep.int(NA,NPerm )
for (w in 1:NPerm) {
synLF5e <- synLFQ5 # select 3 months from synLFQ5
synLF5e$catch <- synLF5e$catch[,c(1,3,12)]
synLF5e$dates <- synLF5e$dates[c(1,3, 12)]
synLF5e.perm1 <- synLF5e # permuted version of the "syn5" dataset
for (i in 1: ncol(synLF5e.perm1$catch)){
synLF5e.perm1$catch[,i] <- sample(synLF5e$catch[,i], replace = FALSE)
}
res1 <- ELEFAN_GA(synLF5e.perm1, popSize = 40, maxiter = 8,
MA = 9, plot.score = FALSE)
res1$par$phiL
# outp.list[w] <- res1$par$phiL
outp.table4d[w,1] <- round(as.numeric(res1$par[1]), 2)
outp.table5d[w,2] <- round(as.numeric(res1$par[2]), 3)
outp.table5d[w,3] <- round(as.numeric(res1$par[3]), 3)
outp.table5d[w,4] <- round(as.numeric(res1$par[4]), 3)
outp.table5d[w,5] <- round(res1$Rn_max, 4)
outp.table5d[w,6] <- Sys.time()
}
outp.table5d
# outp.table5d
outp.list5d <- outp.table5d$Phi_prime # choose data to be analyzed (e.g. Phi')
#outp.list5d <- outp.table5d # choose data to be analyzed (e.g. Phi')
# outp.list5d <- as.numeric(outp.table5d$V1)
outp.list5d
summary(outp.list5d)
# F.e export results
write.table(outp.table5d, file = "outp_table5d_syn5d_400_3months_CVii.csv", sep = ";", row.names = FALSE, col.names = FALSE)
write.table(outp.list5d, file = "outp_listd_syn5d_400_3monthsdCVii.csv", sep = ";", row.names = FALSE, col.names = FALSE)
outp.list5d <- read.table(file = "outp_listd_syn5d_400_3monthsdCVii.csv", blank.lines.skip = TRUE)
# outp.list5d <- as.numeric(outp.list5d$V1)
# F.f plot histogram with 95% Confid. Intervals for the set of permuted versions of Phi' (red)
# and original estimate for Phi' (green).
# If it was signifficant, the green line should be outside the red lines.
hist(outp.list5d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4),
xlab = "Phi'")
abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "blue")
d <- density(outp.list5d, adjust=0.9)
plot(d, type = "n", xlim = c(0,max(d$x)))
polygon(d, col = "wheat")
abline(v = Phi_medvalue, col = "blue")
d$y
d$x
hist(outp.list5d, main = "Posterior distrib. for Phi' ", col = "grey", xlim = c(0,4), prob=TRUE,
xlab = "Phi'")
# abline(v = Phi.syn5, col = "yellow")
abline(v = Phi_medvalue, col = "green")
lines(d, col="blue", lwd=2) # add a density estimate with defaults
#lines(density(outp.list2, adjust=2), lty="dotted", col="darkgreen", lwd=2)
# 95% Confidenc interval for Phi'
quantile(outp.list5d, c(0.025, 0.975)) # 0.6 to 2.2
abline(v =quantile(outp.list5d, c(0.95)), col = "red") # red line where 95% of the data are located
# approximate "p" value (becomes more precise mith higher NPerm)
quantiles95 <- quantile(outp.list5d, c(0.025, 0.975))
# abline(v = quantiles95, col = "red")
quantiles95 # 95% Confid. Interval
# F.g Calculate "p" that Phi' is different from (i.e, larger than ) a set of Phi' values generated by a random distribution.
# Analyse posterior distrib. , calculate the relative amount (percentage) of data that are equal or above the observed value (should be less than 5%)
Nlarger <- length ( outp.list5d[outp.list5d >= Phi_medvalue]) # raw data
Nsmaller <- length ( outp.list5d[outp.list5d < Phi_medvalue]) # raw data
p_value <- Nlarger/Nsmaller # raw data "p"
p_value # 0.2 - LFD distributions are NOT signifficvantly different from "white noise"
text(1,0.5, " p=0.205, Nm=3months")
text(1,0.3, " NPerm=400")
# Conclusion: Phi' is NOT signifficantly (p = < 0.01) different from Phi' obtained from "white noise"
# , for the "syn5b" dataset
NlargerD <- length ( d$y[d$x >= Phi_medvalue]) # raw data
NsmallerD <- length ( d$y[d$x < Phi_medvalue]) # raw data
p_valueD <- NlargerD/NsmallerD # interpolated data "p"
p_valueD # 0.51 - LFD distributions are NOT signifficvantly different from "white noise"
# IMPORTANT: Use only raw posteriors (no interpolation) and MANY Perms (> 400)
# Do not use kernel density smoothing for "p". Tails of density distribs depend on subjective choice of smoothing parameter
# Use kernel density smoothing only for graphical purposes.