#install.packages("bestNormalize")
library("bestNormalize")

all <- list.files(pattern = "txt")
length(all)

## [1] 159

for(i in 119:125){
  print(all[i]  )
  my.data <- read.table(all[i], fill = T)
  print(shapiro.test(my.data$V3))
}

## [1] "TPA.TUE.10to14.S.txt"
## 
##  Shapiro-Wilk normality test
## 
## data:  my.data$V3
## W = 0.98777, p-value = 0.07571
## 
## [1] "TPA.TUE.6to9.C.txt"
## 
##  Shapiro-Wilk normality test
## 
## data:  my.data$V3
## W = 0.99571, p-value = 0.8338
## 
## [1] "TPA.TUE.6to9.S.txt"
## 
##  Shapiro-Wilk normality test
## 
## data:  my.data$V3
## W = 0.9921, p-value = 0.3342
## 
## [1] "TPA.TUE.p3top1.C.txt"
## 
##  Shapiro-Wilk normality test
## 
## data:  my.data$V3
## W = 0.90186, p-value = 2.138e-10
## 
## [1] "TPA.TUE.p3top1.S.txt"
## 
##  Shapiro-Wilk normality test
## 
## data:  my.data$V3
## W = 0.37547, p-value < 2.2e-16
## 
## [1] "TPA.WaterK.S.txt"
## 
##  Shapiro-Wilk normality test
## 
## data:  my.data$V3
## W = 0.98898, p-value = 0.1163
## 
## [1] "TPA.WaterNa.S.txt"
## 
##  Shapiro-Wilk normality test
## 
## data:  my.data$V3
## W = 0.97013, p-value = 0.00024

AGR

mdata <- read.table("TPA.AGR.0to5.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 206 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.9598
##  - Box-Cox: 0.9422
##  - Center+scale: 1.005
##  - Double Reversed Log_b(x+a): 1.4982
##  - Exp(x): 25.0398
##  - Log_b(x+a): 0.9598
##  - orderNorm (ORQ): 1.095
##  - sqrt(x + a): 0.9627
##  - Yeo-Johnson: 0.9422
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 206 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 0.2510099 
##  - mean (before standardization) = 5.860783 
##  - sd (before standardization) = 0.6394679

MASS::truehist(asinh(mdata$V3), nbins = 12)

shapiro.test(asinh(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  asinh(mdata$V3)
## W = 0.99512, p-value = 0.7494

mdata$V3 <- asinh(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.0to5.C.txt")

mdata <- read.table("TPA.AGR.0to5.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0455
##  - Box-Cox: 1.1023
##  - Center+scale: 1.1188
##  - Double Reversed Log_b(x+a): 1.7542
##  - Exp(x): 23.9328
##  - Log_b(x+a): 1.0455
##  - orderNorm (ORQ): 1.2593
##  - sqrt(x + a): 1.1272
##  - Yeo-Johnson: 1.1023
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 3.882789 
##  - sd (before standardization) = 0.2569503

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99598, p-value = 0.8698

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.0to5.S.txt")

mdata <- read.table("TPA.AGR.0to9.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 206 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.8982
##  - Box-Cox: 0.9022
##  - Center+scale: 0.9105
##  - Double Reversed Log_b(x+a): 1.4282
##  - Exp(x): 26.1508
##  - Log_b(x+a): 0.8982
##  - orderNorm (ORQ): 1.087
##  - sqrt(x + a): 0.8945
##  - Yeo-Johnson: 0.9092
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 206 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - mean (before standardization) = 7.324105 
##  - sd (before standardization) = 0.9590447

MASS::truehist(asinh(mdata$V3), nbins = 12)

shapiro.test(asinh(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  asinh(mdata$V3)
## W = 0.99669, p-value = 0.9415

mdata$V3 <- asinh(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.0to9.C.txt")

mdata <- read.table("TPA.AGR.0to9.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1785
##  - Box-Cox: 1.1935
##  - Center+scale: 1.2217
##  - Double Reversed Log_b(x+a): 2.0678
##  - Exp(x): 24.291
##  - Log_b(x+a): 1.1752
##  - orderNorm (ORQ): 1.3162
##  - sqrt(x + a): 1.0643
##  - Yeo-Johnson: 1.1802
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - mean (before standardization) = 5.400424 
##  - sd (before standardization) = 0.7229767

MASS::truehist(asinh(mdata$V3), nbins = 12)

shapiro.test(asinh(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  asinh(mdata$V3)
## W = 0.99522, p-value = 0.7665

mdata$V3 <- asinh(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.0to9.S.txt")

mdata <- read.table("TPA.AGR.6to9.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 206 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.997
##  - Box-Cox: 1.0588
##  - Center+scale: 1.0162
##  - Double Reversed Log_b(x+a): 2.2972
##  - Exp(x): 25.202
##  - Log_b(x+a): 0.997
##  - orderNorm (ORQ): 1.1803
##  - sqrt(x + a): 1.0048
##  - Yeo-Johnson: 1.0588
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 206 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 5.108199 
##  - sd (before standardization) = 0.2589609

MASS::truehist(asinh(mdata$V3), nbins = 12)

shapiro.test(asinh(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  asinh(mdata$V3)
## W = 0.99639, p-value = 0.914

mdata$V3 <- asinh(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.6to9.C.txt")

mdata <- read.table("TPA.AGR.6to9.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.9325
##  - Box-Cox: 1.0485
##  - Center+scale: 1.2243
##  - Double Reversed Log_b(x+a): 2.0955
##  - Exp(x): 25.2075
##  - Log_b(x+a): 0.9325
##  - orderNorm (ORQ): 1.1198
##  - sqrt(x + a): 1.0573
##  - Yeo-Johnson: 1.0485
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 4.375408 
##  - sd (before standardization) = 0.2714592

MASS::truehist(asinh(mdata$V3), nbins = 12)

shapiro.test(asinh(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  asinh(mdata$V3)
## W = 0.99357, p-value = 0.5179

mdata$V3 <- asinh(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.6to9.S.txt")

mdata <- read.table("TPA.AGR.10to14.C.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 203 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.9808
##  - Box-Cox: 0.9985
##  - Center+scale: 1.1507
##  - Double Reversed Log_b(x+a): 1.6383
##  - Exp(x): 22.6748
##  - Log_b(x+a): 0.9808
##  - orderNorm (ORQ): 1.072
##  - sqrt(x + a): 1.0613
##  - Yeo-Johnson: 0.9985
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 203 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 5.638954 
##  - sd (before standardization) = 0.2323991

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99692, p-value = 0.9608

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.10to14.C.txt")

mdata <- read.table("TPA.AGR.10to14.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2315
##  - Box-Cox: 1.2487
##  - Center+scale: 1.21
##  - Double Reversed Log_b(x+a): 1.6747
##  - Exp(x): 26.2172
##  - Log_b(x+a): 1.2315
##  - orderNorm (ORQ): 1.4463
##  - sqrt(x + a): 1.2802
##  - Yeo-Johnson: 1.2487
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## center_scale(x) Transformation with 205 nonmissing obs.
##  Estimated statistics:
##  - mean (before standardization) = 65.84115 
##  - sd (before standardization) = 16.93093

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99679, p-value = 0.9499

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.10to14.S.txt")

mdata <- read.table("TPA.AGR.p3top1.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 206 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1155
##  - Box-Cox: 0.9818
##  - Center+scale: 1.1227
##  - Double Reversed Log_b(x+a): 1.2148
##  - Exp(x): 20.8075
##  - Log_b(x+a): 1.1255
##  - orderNorm (ORQ): 1.25
##  - sqrt(x + a): 1.0158
##  - Yeo-Johnson: 0.9952
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 206 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 0.4862448 
##  - mean (before standardization) = 4.728521 
##  - sd (before standardization) = 1.098106

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99733, p-value = 0.9804

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.p3top1.C.txt")

mdata <- read.table("TPA.AGR.p3top1.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.3333
##  - Box-Cox: 1.2223
##  - Center+scale: 1.158
##  - Double Reversed Log_b(x+a): 2.1497
##  - Exp(x): 21.799
##  - Log_b(x+a): 1.3333
##  - orderNorm (ORQ): 1.2633
##  - sqrt(x + a): 1.198
##  - Yeo-Johnson: 1.2083
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## center_scale(x) Transformation with 205 nonmissing obs.
##  Estimated statistics:
##  - mean (before standardization) = 11.64849 
##  - sd (before standardization) = 3.875888

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99523, p-value = 0.7689

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.AGR.p3top1.S.txt")

RGR

mdata <- read.table("TPA.RGR.0to9.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.151
##  - Box-Cox: 1.1598
##  - Center+scale: 1.151
##  - Double Reversed Log_b(x+a): 1.6381
##  - Exp(x): 1.1947
##  - Log_b(x+a): 1.2318
##  - orderNorm (ORQ): 1.2387
##  - sqrt(x + a): 1.1753
##  - Yeo-Johnson: 1.1963
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 0.1768424 
##  - sd (before standardization) = 0.01514213

MASS::truehist(exp(mdata$V3), nbins = 12)

shapiro.test(exp(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  exp(mdata$V3)
## W = 0.97866, p-value = 0.003291

mdata <- read.table("TPA.RGR.6to9.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 206 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.5412
##  - Box-Cox: 1.7832
##  - Center+scale: 1.5308
##  - Double Reversed Log_b(x+a): 1.3411
##  - Exp(x): 1.4835
##  - Log_b(x+a): 1.69
##  - orderNorm (ORQ): 1.4298
##  - sqrt(x + a): 1.6337
##  - Yeo-Johnson: 1.6725
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 206 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 0.2599606 ; min(x) = 0.09644391 
##  - mean (before standardization) = 0.4153716 
##  - sd (before standardization) = 0.09823594

mdata$V3 <- as.data.frame(mdata$V3)
trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.93016, p-value = 2.385e-08

mdata <- read.table("TPA.RGR.6to9.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0277
##  - Box-Cox: 1.0773
##  - Center+scale: 1.031
##  - Double Reversed Log_b(x+a): 2.0095
##  - Exp(x): 1.0188
##  - Log_b(x+a): 1.1005
##  - orderNorm (ORQ): 1.1048
##  - sqrt(x + a): 1.0593
##  - Yeo-Johnson: 1.0533
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized exp(x) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 1.15412 
##  - sd (before standardization) = 0.01679122

mdata$V3 <- as.data.frame(mdata$V3)
trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.98196, p-value = 0.009882

mdata <- read.table("TPA.RGR.10to14.C.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 203 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4043
##  - Box-Cox: 1.4328
##  - Center+scale: 1.3903
##  - Double Reversed Log_b(x+a): 2.037
##  - Exp(x): 1.4082
##  - Log_b(x+a): 1.3105
##  - orderNorm (ORQ): 1.3355
##  - sqrt(x + a): 1.3692
##  - Yeo-Johnson: 1.4472
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 203 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = -0.8181692 
##  - sd (before standardization) = 0.0281417

trait <- boxcox(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.98307, p-value = 0.01529

mdata <- read.table("TPA.RGR.10to14.S.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2105
##  - Box-Cox: 1.222
##  - Center+scale: 1.2278
##  - Double Reversed Log_b(x+a): 1.6187
##  - Exp(x): 1.2067
##  - Log_b(x+a): 1.2553
##  - orderNorm (ORQ): 1.3027
##  - sqrt(x + a): 1.2312
##  - Yeo-Johnson: 1.1737
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 205 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.08803441 
##  - sd (before standardization) = 0.004882154

trait <- exp(mdata$V3)
MASS::truehist(trait, nbins = 12)

shapiro.test(trait)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait
## W = 0.98087, p-value = 0.006843

TUE

mdata <- read.table("TPA.TUE.0to5.S.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0515
##  - Box-Cox: 1.0555
##  - Center+scale: 1.0107
##  - Double Reversed Log_b(x+a): 2.0432
##  - Exp(x): 1.4717
##  - Log_b(x+a): 0.9727
##  - orderNorm (ORQ): 1.2335
##  - sqrt(x + a): 1.0367
##  - Yeo-Johnson: 1.0485
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = 0.1278904 
##  - sd (before standardization) = 0.0594089

trait <- yeojohnson(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.9627, p-value = 3.117e-05

mdata <- read.table("TPA.TUE.10to14.C.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 203 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0542
##  - Box-Cox: 1.0367
##  - Center+scale: 1.2247
##  - Double Reversed Log_b(x+a): 1.4683
##  - Exp(x): 1.325
##  - Log_b(x+a): 1.0027
##  - orderNorm (ORQ): 1.2513
##  - sqrt(x + a): 1.0752
##  - Yeo-Johnson: 1.0542
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 203 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = 0.06645373 
##  - sd (before standardization) = 0.04613647

trait <- asinh(mdata$V3)
MASS::truehist(trait, nbins = 12)

shapiro.test(trait)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait
## W = 0.99586, p-value = 0.8596

mdata$V3 <- trait
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.TUE.10to14.C.txt")

mdata <- read.table("TPA.TUE.10to14.S.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0612
##  - Box-Cox: 1.0607
##  - Center+scale: 1.0403
##  - Double Reversed Log_b(x+a): 1.9273
##  - Exp(x): 1.2765
##  - Log_b(x+a): 1.0262
##  - orderNorm (ORQ): 1.214
##  - sqrt(x + a): 1.0405
##  - Yeo-Johnson: 1.0773
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = 0.1294429 
##  - sd (before standardization) = 0.06306645

trait <- asinh(mdata$V3)
MASS::truehist(trait, nbins = 12)

shapiro.test(trait)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait
## W = 0.99268, p-value = 0.4004

mdata$V3 <- trait
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.TUE.10to14.S.txt")

mdata <- read.table("TPA.TUE.p3top1.C.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 206 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4478
##  - Box-Cox: 1.3993
##  - Center+scale: 1.5852
##  - Double Reversed Log_b(x+a): 2.6203
##  - Exp(x): 3.1595
##  - Log_b(x+a): 1.4197
##  - orderNorm (ORQ): 1.4953
##  - sqrt(x + a): 1.4973
##  - Yeo-Johnson: 1.3313
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 206 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.1977892 
##  - sd (before standardization) = 0.001138764

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.90186, p-value = 2.138e-10

mdata <- read.table("TPA.TUE.p3top1.S.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6852
##  - Box-Cox: 1.1463
##  - Center+scale: 2.645
##  - Double Reversed Log_b(x+a): 1.5807
##  - Exp(x): 5.5337
##  - Log_b(x+a): 1.5113
##  - orderNorm (ORQ): 1.4788
##  - sqrt(x + a): 2.0463
##  - Yeo-Johnson: 1.1118
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 205 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.999961 
##  - mean (before standardization) = 0.1981084 
##  - sd (before standardization) = 0.001064593

trait <- yeojohnson(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.93787, p-value = 1.115e-07

TPA END MEASUREMENTS

mdata <- read.table("TPA.SFM.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 206 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.9823
##  - Box-Cox: 1.014
##  - Center+scale: 0.9945
##  - Double Reversed Log_b(x+a): 1.7872
##  - Exp(x): 26.3313
##  - Log_b(x+a): 0.9823
##  - orderNorm (ORQ): 1.0437
##  - sqrt(x + a): 0.9688
##  - Yeo-Johnson: 1.014
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 206 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - mean (before standardization) = 7.533917 
##  - sd (before standardization) = 1.011196

trait <- asinh(mdata$V3)
MASS::truehist(trait, nbins = 12)

shapiro.test(trait)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait
## W = 0.9904, p-value = 0.1876

mdata$V3 <- trait
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.SFM.C.txt")

mdata <- read.table("TPA.SFM.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1763
##  - Box-Cox: 1.258
##  - Center+scale: 1.5512
##  - Double Reversed Log_b(x+a): 1.9716
##  - Exp(x): 25.0615
##  - Log_b(x+a): 1.1697
##  - orderNorm (ORQ): 1.3828
##  - sqrt(x + a): 1.2642
##  - Yeo-Johnson: 1.2718
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = 1.462679 
##  - sd (before standardization) = 0.1220375

trait <- asinh(mdata$V3)
MASS::truehist(trait, nbins = 12)

shapiro.test(trait)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait
## W = 0.99302, p-value = 0.4432

mdata$V3 <- trait
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.SFM.S.txt")

mdata <- read.table("TPA.SDM.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.9938
##  - Box-Cox: 1.0527
##  - Center+scale: 1.0548
##  - Double Reversed Log_b(x+a): 1.359
##  - Exp(x): 4.2078
##  - Log_b(x+a): 0.9845
##  - orderNorm (ORQ): 1.1877
##  - sqrt(x + a): 1.0262
##  - Yeo-Johnson: 1.0563
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = 0.4357593 
##  - sd (before standardization) = 0.1252154

trait <- asinh(mdata$V3)
MASS::truehist(trait, nbins = 12)

shapiro.test(trait)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait
## W = 0.99412, p-value = 0.6

mdata$V3 <- trait
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.TPA.SDM.S.txt")

mdata <- read.table("TPA.NaKratio.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6587
##  - Box-Cox: 1.2888
##  - Center+scale: 1.8185
##  - Double Reversed Log_b(x+a): 2.0839
##  - Exp(x): 2.1348
##  - Log_b(x+a): 1.1862
##  - orderNorm (ORQ): 1.2868
##  - sqrt(x + a): 1.4583
##  - Yeo-Johnson: 1.187
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 205 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = -0.5318303 
##  - sd (before standardization) = 0.123506

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.87524, p-value = 5.795e-12

mdata <- read.table("TPA.WaterNa.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2725
##  - Box-Cox: 1.3258
##  - Center+scale: 1.4262
##  - Double Reversed Log_b(x+a): 1.8078
##  - Exp(x): 25.8745
##  - Log_b(x+a): 1.2725
##  - orderNorm (ORQ): 1.2223
##  - sqrt(x + a): 1.3707
##  - Yeo-Johnson: 1.3192
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 205 nonmissing obs and ties
##  - 204 unique values 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
##  26.270  51.960  58.107  66.590 111.290

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.97013, p-value = 0.00024

mdata <- read.table("TPA.TissueNa.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 205 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4817
##  - Box-Cox: 1.3977
##  - Center+scale: 1.6253
##  - Double Reversed Log_b(x+a): 2.3298
##  - Exp(x): 2.0135
##  - Log_b(x+a): 1.349
##  - orderNorm (ORQ): 1.2257
##  - sqrt(x + a): 1.4343
##  - Yeo-Johnson: 1.3588
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 205 nonmissing obs and ties
##  - 120 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 0.210 0.403 0.460 0.530 1.040

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.9239, p-value = 8.196e-09

AREA

mdata <- read.table("KAU.AREA.9.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 187 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.3225
##  - Box-Cox: 1.0666
##  - Center+scale: 1.3225
##  - Double Reversed Log_b(x+a): 1.8117
##  - Exp(x): 1.3188
##  - Log_b(x+a): 0.9553
##  - orderNorm (ORQ): 1.1956
##  - sqrt(x + a): 1.1176
##  - Yeo-Johnson: 1.2834
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 187 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = -1.618907 
##  - sd (before standardization) = 0.1126557

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.98893, p-value = 0.1545

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.AREA.9.C.txt")

mdata <- read.table("KAU.AREA.16.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 190 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2221
##  - Box-Cox: 1.0674
##  - Center+scale: 1.2221
##  - Double Reversed Log_b(x+a): 1.8524
##  - Exp(x): 1.2442
##  - Log_b(x+a): 1.0526
##  - orderNorm (ORQ): 1.1411
##  - sqrt(x + a): 1.0637
##  - Yeo-Johnson: 1.0342
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 190 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.04508165 
##  - sd (before standardization) = 0.01245384

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.98345, p-value = 0.02431

mdata <- read.table("KAU.AREA.16.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 189 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1159
##  - Box-Cox: 1.0396
##  - Center+scale: 1.1159
##  - Double Reversed Log_b(x+a): 1.5859
##  - Exp(x): 1.1343
##  - Log_b(x+a): 0.999
##  - orderNorm (ORQ): 1.1046
##  - sqrt(x + a): 1.0586
##  - Yeo-Johnson: 1.1044
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 189 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = -1.290742 
##  - sd (before standardization) = 0.1523645

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.99364, p-value = 0.5916

mdata$V3 <- predict(log_x(mdata$V3, b=10))
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.AREA.16.S.txt")

mdata <- read.table("KAU.AREA.30.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 185 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2773
##  - Box-Cox: 1.0999
##  - Center+scale: 1.2474
##  - Double Reversed Log_b(x+a): 2.1882
##  - Exp(x): 1.2388
##  - Log_b(x+a): 1.1961
##  - orderNorm (ORQ): 1.3983
##  - sqrt(x + a): 1.1148
##  - Yeo-Johnson: 1.1189
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 185 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 0.4658924 
##  - mean (before standardization) = -1.084038 
##  - sd (before standardization) = 0.1678059

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99635, p-value = 0.9388

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.AREA.30.S.txt")

mdata <- read.table("KAU.AREA.36.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 187 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.9054
##  - Box-Cox: 0.9506
##  - Center+scale: 0.9506
##  - Double Reversed Log_b(x+a): 1.8366
##  - Exp(x): 1.0384
##  - Log_b(x+a): 1.1698
##  - orderNorm (ORQ): 1.0013
##  - sqrt(x + a): 0.9768
##  - Yeo-Johnson: 0.9547
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 187 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 0.2490761 
##  - sd (before standardization) = 0.07783096

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99122, p-value = 0.3137

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.AREA.36.S.txt")

mdata <- read.table("KAU.AREA.68.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 184 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.3097
##  - Box-Cox: 1.2604
##  - Center+scale: 1.3529
##  - Double Reversed Log_b(x+a): 1.5472
##  - Exp(x): 1.6456
##  - Log_b(x+a): 1.2917
##  - orderNorm (ORQ): 1.5123
##  - sqrt(x + a): 1.1914
##  - Yeo-Johnson: 1.2219
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 184 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - mean (before standardization) = 0.6686603 
##  - sd (before standardization) = 0.1189378

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99418, p-value = 0.6872

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.AREA.68.C.txt")

mdata <- read.table("KAU.AREA.75.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 179 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.23
##  - Box-Cox: 1.02
##  - Center+scale: 1.2895
##  - Double Reversed Log_b(x+a): 2.4291
##  - Exp(x): 1.6168
##  - Log_b(x+a): 0.9962
##  - orderNorm (ORQ): 1.1961
##  - sqrt(x + a): 1.1017
##  - Yeo-Johnson: 1.0081
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 179 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = -0.3666267 
##  - sd (before standardization) = 0.1480625

MASS::truehist(sqrt(mdata$V3), nbins = 12)

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99093, p-value = 0.319

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.AREA.75.C.txt")

MRENDVI

mdata <- read.table("KAU.MRENDVI.9.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 96 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1622
##  - Box-Cox: 1.1764
##  - Center+scale: 1.0316
##  - Double Reversed Log_b(x+a): 1.0176
##  - Exp(x): 1.2098
##  - Log_b(x+a): 1.6156
##  - orderNorm (ORQ): 1.3413
##  - sqrt(x + a): 1.2391
##  - Yeo-Johnson: 1.1636
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 96 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 1.882206 ; min(x) = -0.07033806 
##  - mean (before standardization) = 0.2786437 
##  - sd (before standardization) = 0.1544091

trait <- boxcox(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.98991, p-value = 0.6845

mdata$V3 <- sqrt(trait2)

## Warning in sqrt(trait2): NaNs produced

write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "KAU.MRENDVI.9.S.txt")

mdata <- read.table("KAU.MRENDVI.16.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 190 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2663
##  - Box-Cox: 1.1558
##  - Center+scale: 1.2442
##  - Double Reversed Log_b(x+a): 1.0489
##  - Exp(x): 1.2479
##  - Log_b(x+a): 1.4137
##  - orderNorm (ORQ): 1.27
##  - sqrt(x + a): 1.3216
##  - Yeo-Johnson: 1.1558
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 190 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 0.5353393 ; min(x) = 0.3061361 
##  - mean (before standardization) = 0.501834 
##  - sd (before standardization) = 0.157437

trait <- yeojohnson(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.88519, p-value = 6.963e-11

mdata <- read.table("KAU.MRENDVI.16.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 189 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4219
##  - Box-Cox: 1.3003
##  - Center+scale: 1.3961
##  - Double Reversed Log_b(x+a): 1.4598
##  - Exp(x): 1.3333
##  - Log_b(x+a): 1.5996
##  - orderNorm (ORQ): 1.6458
##  - sqrt(x + a): 1.4411
##  - Yeo-Johnson: 1.323
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 189 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 1.999958 
##  - mean (before standardization) = -0.4093739 
##  - sd (before standardization) = 0.01367975

trait <- boxcox(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.9859, p-value = 0.05552

mdata$V3 <- trait2
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.MRENDVI.16.S.txt")

mdata <- read.table("KAU.MRENDVI.30.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 189 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4053
##  - Box-Cox: 1.313
##  - Center+scale: 1.3943
##  - Double Reversed Log_b(x+a): 1.3726
##  - Exp(x): 1.3652
##  - Log_b(x+a): 1.4786
##  - orderNorm (ORQ): 1.33
##  - sqrt(x + a): 1.4702
##  - Yeo-Johnson: 1.287
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 189 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.1739921 
##  - sd (before standardization) = 0.003114791

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.95194, p-value = 5.248e-06

mdata <- read.table("KAU.MRENDVI.30.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 185 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2372
##  - Box-Cox: 1.185
##  - Center+scale: 1.2593
##  - Double Reversed Log_b(x+a): 1.3199
##  - Exp(x): 1.2077
##  - Log_b(x+a): 1.3189
##  - orderNorm (ORQ): 1.3442
##  - sqrt(x + a): 1.2722
##  - Yeo-Johnson: 1.1768
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 185 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.1745987 
##  - sd (before standardization) = 0.002853297

trait <- boxcox(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.991, p-value = 0.3014

mdata$V3 <- trait2
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.MRENDVI.30.S.txt")

mdata <- read.table("KAU.MRENDVI.36.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 188 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6282
##  - Box-Cox: 1.4551
##  - Center+scale: 1.5832
##  - Double Reversed Log_b(x+a): 1.251
##  - Exp(x): 1.5529
##  - Log_b(x+a): 1.8335
##  - orderNorm (ORQ): 1.2279
##  - sqrt(x + a): 1.7265
##  - Yeo-Johnson: 1.4668
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 188 nonmissing obs and ties
##  - 189 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 0.281 0.480 0.509 0.528 0.585

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.89742, p-value = 4.287e-10

mdata <- read.table("KAU.MRENDVI.36.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 187 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6954
##  - Box-Cox: 1.4801
##  - Center+scale: 1.6649
##  - Double Reversed Log_b(x+a): 1.3014
##  - Exp(x): 1.6074
##  - Log_b(x+a): 1.8047
##  - orderNorm (ORQ): 1.4039
##  - sqrt(x + a): 1.7468
##  - Yeo-Johnson: 1.4361
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 187 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 0.600436 ; min(x) = 0.2187676 
##  - mean (before standardization) = 0.4987305 
##  - sd (before standardization) = 0.1455551

trait <- boxcox(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.97807, p-value = 0.004877

mdata <- read.table("KAU.MRENDVI.68.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 184 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.7661
##  - Box-Cox: 1.439
##  - Center+scale: 1.6693
##  - Double Reversed Log_b(x+a): 1.2481
##  - Exp(x): 1.5418
##  - Log_b(x+a): 2.3511
##  - orderNorm (ORQ): 1.43
##  - sqrt(x + a): 1.9769
##  - Yeo-Johnson: 1.325
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 184 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 0.6284347 ; min(x) = 0.1638 
##  - mean (before standardization) = 0.4486253 
##  - sd (before standardization) = 0.1972762

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.93713, p-value = 3.476e-07

mdata <- read.table("KAU.MRENDVI.68.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 185 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2092
##  - Box-Cox: 1.0589
##  - Center+scale: 1.1437
##  - Double Reversed Log_b(x+a): 1.3508
##  - Exp(x): 1.0814
##  - Log_b(x+a): 1.4679
##  - orderNorm (ORQ): 1.1688
##  - sqrt(x + a): 1.1803
##  - Yeo-Johnson: 1.0886
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 185 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 1.712865 
##  - mean (before standardization) = -0.4898615 
##  - sd (before standardization) = 0.02945206

trait <- mdata$V3
trait2 <- yeojohnson(trait)
p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.97702, p-value = 0.003814

mdata <- read.table("KAU.MRENDVI.75.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 179 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.3631
##  - Box-Cox: 1.2408
##  - Center+scale: 1.3048
##  - Double Reversed Log_b(x+a): 1.2813
##  - Exp(x): 1.2918
##  - Log_b(x+a): 1.806
##  - orderNorm (ORQ): 1.2046
##  - sqrt(x + a): 1.5503
##  - Yeo-Johnson: 1.198
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 179 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.1602267 
##  - sd (before standardization) = 0.01161967

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.96053, p-value = 6.256e-05

PRI

mdata <- read.table("KAU.PRI.16.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 190 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.48
##  - Center+scale: 1.48
##  - Double Reversed Log_b(x+a): 1.3695
##  - Exp(x): 1.5168
##  - Log_b(x+a): 1.8905
##  - orderNorm (ORQ): 1.3805
##  - sqrt(x + a): 1.6357
##  - Yeo-Johnson: 1.5205
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 190 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = -0.00252848 ; min(x) = -0.07383932 
##  - mean (before standardization) = 0.4322043 
##  - sd (before standardization) = 0.1745917

trait <- asinh(mdata$V3)
MASS::truehist(trait, nbins = 12)

shapiro.test(trait)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait
## W = 0.97788, p-value = 0.004178

mdata <- read.table("KAU.PRI.16.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 189 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1634
##  - Center+scale: 1.1634
##  - Double Reversed Log_b(x+a): 1.4434
##  - Exp(x): 1.1781
##  - Log_b(x+a): 1.5804
##  - orderNorm (ORQ): 1.285
##  - sqrt(x + a): 1.1978
##  - Yeo-Johnson: 1.156
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 189 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 4.99994 
##  - mean (before standardization) = -0.03442627 
##  - sd (before standardization) = 0.004887412

trait <- mdata$V3
trait2 <- yeojohnson(trait)
p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.99705, p-value = 0.9769

mdata$V3 <- x2
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.PRI.16.S.txt")

mdata <- read.table("KAU.PRI.30.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 189 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4016
##  - Center+scale: 1.4016
##  - Double Reversed Log_b(x+a): 1.4121
##  - Exp(x): 1.3537
##  - Log_b(x+a): 1.6959
##  - orderNorm (ORQ): 1.3192
##  - sqrt(x + a): 1.5319
##  - Yeo-Johnson: 1.3228
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 189 nonmissing obs and ties
##  - 190 unique values 
##  - Original quantiles:
##     0%    25%    50%    75%   100% 
## -0.062 -0.028 -0.023 -0.018 -0.004

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.95449, p-value = 9.276e-06

mdata <- read.table("KAU.PRI.30.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 185 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0507
##  - Center+scale: 1.0507
##  - Double Reversed Log_b(x+a): 1.5663
##  - Exp(x): 1.0728
##  - Log_b(x+a): 1.5075
##  - orderNorm (ORQ): 1.1954
##  - sqrt(x + a): 1.0503
##  - Yeo-Johnson: 1.1187
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 185 nonmissing obs.:
##  Relevant statistics:
##  - a = 0.05235165 
##  - mean (before standardization) = 0.1474897 
##  - sd (before standardization) = 0.0306786

shapiro.test(exp(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  exp(mdata$V3)
## W = 0.99533, p-value = 0.8378

mdata$V3 <- exp(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.PRI.30.S.txt")

mdata <- read.table("KAU.PRI.36.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 188 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.022
##  - Center+scale: 1.022
##  - Double Reversed Log_b(x+a): 1.1518
##  - Exp(x): 1.0189
##  - Log_b(x+a): 1.4897
##  - orderNorm (ORQ): 1.1215
##  - sqrt(x + a): 1.1237
##  - Yeo-Johnson: 1.0011
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 188 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 4.99994 
##  - mean (before standardization) = -0.03839941 
##  - sd (before standardization) = 0.007787223

shapiro.test(exp(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  exp(mdata$V3)
## W = 0.95895, p-value = 2.747e-05

mdata <- read.table("KAU.PRI.36.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 187 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0243
##  - Center+scale: 1.0243
##  - Double Reversed Log_b(x+a): 1.0208
##  - Exp(x): 1.0132
##  - Log_b(x+a): 1.2442
##  - orderNorm (ORQ): 1.1008
##  - sqrt(x + a): 0.9708
##  - Yeo-Johnson: 0.9991
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 187 nonmissing obs.:
##  Relevant statistics:
##  - a = 0.08599875 
##  - mean (before standardization) = 0.1974982 
##  - sd (before standardization) = 0.0255357

trait <- mdata$V3
trait2 <- yeojohnson(trait)
p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.97585, p-value = 0.002515

mdata <- read.table("KAU.PRI.69.C.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 184 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.3979
##  - Center+scale: 1.3979
##  - Double Reversed Log_b(x+a): 1.1594
##  - Exp(x): 1.3979
##  - Log_b(x+a): 2.3806
##  - orderNorm (ORQ): 1.1144
##  - sqrt(x + a): 1.8082
##  - Yeo-Johnson: 1.2919
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 184 nonmissing obs and ties
##  - 185 unique values 
##  - Original quantiles:
##     0%    25%    50%    75%   100% 
## -0.122 -0.079 -0.064 -0.054 -0.033

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.68588, p-value < 2.2e-16

mdata <- read.table("KAU.PRI.75.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 179 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.5304
##  - Center+scale: 1.5342
##  - Double Reversed Log_b(x+a): 1.2323
##  - Exp(x): 1.507
##  - Log_b(x+a): 2.7956
##  - orderNorm (ORQ): 1.2327
##  - sqrt(x + a): 2.0618
##  - Yeo-Johnson: 1.4009
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 179 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = -0.02814495 ; min(x) = -0.1267321 
##  - mean (before standardization) = 0.4091387 
##  - sd (before standardization) = 0.1711149

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.6885, p-value < 2.2e-16

mdata <- read.table("KAU.PRI.75.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1075
##  - Center+scale: 1.1075
##  - Double Reversed Log_b(x+a): 1.4067
##  - Exp(x): 1.0869
##  - Log_b(x+a): 1.7937
##  - orderNorm (ORQ): 1.1722
##  - sqrt(x + a): 1.3015
##  - Yeo-Johnson: 1.0665
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 171 nonmissing obs.:
##  Estimated statistics:
##  - lambda = 4.99994 
##  - mean (before standardization) = -0.07442292 
##  - sd (before standardization) = 0.007287169

trait <- mdata$V3
trait2 <- yeojohnson(trait)
p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.99054, p-value = 0.3171

mdata$V3 <- x2
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.PRI.75.S.txt")

WBI

mdata <- read.table("KAU.WBI.9.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 187 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 2.3322
##  - Box-Cox: 2.1756
##  - Center+scale: 2.2704
##  - Double Reversed Log_b(x+a): 1.4895
##  - Exp(x): 2.183
##  - Log_b(x+a): 2.3733
##  - orderNorm (ORQ): 1.2371
##  - sqrt(x + a): 2.3322
##  - Yeo-Johnson: 2.0937
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 187 nonmissing obs and ties
##  - 188 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 0.915 1.004 1.011 1.019 1.059

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.73785, p-value < 2.2e-16

mdata <- read.table("KAU.WBI.9.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 180 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 2.4572
##  - Box-Cox: 2.2472
##  - Center+scale: 2.3639
##  - Double Reversed Log_b(x+a): 1.4617
##  - Exp(x): 2.255
##  - Log_b(x+a): 2.5739
##  - orderNorm (ORQ): 1.0028
##  - sqrt(x + a): 2.5
##  - Yeo-Johnson: 2.1267
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 180 nonmissing obs and ties
##  - 181 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 0.864 0.968 0.980 0.989 1.016

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.75297, p-value = 4.3e-16

mdata <- read.table("KAU.WBI.16.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 190 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4026
##  - Box-Cox: 1.4284
##  - Center+scale: 1.3732
##  - Double Reversed Log_b(x+a): 1.1892
##  - Exp(x): 1.3732
##  - Log_b(x+a): 1.3953
##  - orderNorm (ORQ): 1.2442
##  - sqrt(x + a): 1.4026
##  - Yeo-Johnson: 1.4505
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 190 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 1.104949 ; min(x) = 1.00036 
##  - mean (before standardization) = 0.2380877 
##  - sd (before standardization) = 0.09837224

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.96142, p-value = 4.534e-05

mdata <- read.table("KAU.WBI.16.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 189 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 9.9132
##  - Box-Cox: 9.9132
##  - Center+scale: 9.9132
##  - Double Reversed Log_b(x+a): 8.5197
##  - Exp(x): 9.9132
##  - Log_b(x+a): 9.9132
##  - orderNorm (ORQ): 6.8502
##  - sqrt(x + a): 9.9132
##  - Yeo-Johnson: 9.9132
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 189 nonmissing obs and ties
##  - 16 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 1.032 1.032 1.032 1.032 1.032

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.40837, p-value < 2.2e-16

mdata <- read.table("KAU.WBI.30.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 189 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 8.3681
##  - Box-Cox: 8.3681
##  - Center+scale: 8.3681
##  - Double Reversed Log_b(x+a): 5.259
##  - Exp(x): 8.3681
##  - Log_b(x+a): 8.3681
##  - orderNorm (ORQ): 5.2158
##  - sqrt(x + a): 8.3681
##  - Yeo-Johnson: 8.3681
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 189 nonmissing obs and ties
##  - 10 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 0.997 0.997 0.997 0.997 0.997

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.38774, p-value < 2.2e-16

mdata <- read.table("KAU.WBI.30.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 185 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 7.0797
##  - Box-Cox: 7.0797
##  - Center+scale: 7.0797
##  - Double Reversed Log_b(x+a): 4.2654
##  - Exp(x): 7.0797
##  - Log_b(x+a): 7.0797
##  - orderNorm (ORQ): 3.0263
##  - sqrt(x + a): 7.0797
##  - Yeo-Johnson: 7.0797
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 185 nonmissing obs and ties
##  - 19 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 1.021 1.021 1.021 1.021 1.021

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.45538, p-value < 2.2e-16

mdata <- read.table("KAU.WBI.36.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 188 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0525
##  - Box-Cox: 1.0893
##  - Center+scale: 1.0746
##  - Double Reversed Log_b(x+a): 1.258
##  - Exp(x): 1.0893
##  - Log_b(x+a): 1.0341
##  - orderNorm (ORQ): 1.1331
##  - sqrt(x + a): 1.0525
##  - Yeo-Johnson: 1.0525
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 188 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = 0.02196259 
##  - sd (before standardization) = 0.005604793

trait <- yeojohnson(mdata$V3)
trait2 <- predict(trait)
MASS::truehist(trait2, nbins = 12)

shapiro.test(trait2)

## 
##  Shapiro-Wilk normality test
## 
## data:  trait2
## W = 0.97256, p-value = 0.0009385

mdata <- read.table("KAU.WBI.36.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 187 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4164
##  - Box-Cox: 1.3716
##  - Center+scale: 1.3906
##  - Double Reversed Log_b(x+a): 1.1358
##  - Exp(x): 1.3755
##  - Log_b(x+a): 1.451
##  - orderNorm (ORQ): 1.2869
##  - sqrt(x + a): 1.4164
##  - Yeo-Johnson: 1.3755
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 187 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 1.059804 ; min(x) = 1.004875 
##  - mean (before standardization) = 0.4081525 
##  - sd (before standardization) = 0.1549528

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.95893, p-value = 2.875e-05

mdata <- read.table("KAU.WBI.68.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 184 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6429
##  - Box-Cox: 1.5549
##  - Center+scale: 1.6472
##  - Double Reversed Log_b(x+a): 1.2448
##  - Exp(x): 1.5627
##  - Log_b(x+a): 1.6617
##  - orderNorm (ORQ): 1.3189
##  - sqrt(x + a): 1.6429
##  - Yeo-Johnson: 1.4828
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 184 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 1.113899 ; min(x) = 0.9593143 
##  - mean (before standardization) = 0.4161042 
##  - sd (before standardization) = 0.1717343

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.93533, p-value = 2.484e-07

mdata <- read.table("KAU.WBI.68.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 185 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0125
##  - Box-Cox: 1.0399
##  - Center+scale: 1.0022
##  - Double Reversed Log_b(x+a): 1.2527
##  - Exp(x): 1.0477
##  - Log_b(x+a): 1.0198
##  - orderNorm (ORQ): 1.1832
##  - sqrt(x + a): 1.0125
##  - Yeo-Johnson: 1.0223
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## center_scale(x) Transformation with 185 nonmissing obs.
##  Estimated statistics:
##  - mean (before standardization) = 1.02529 
##  - sd (before standardization) = 0.0219166

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.98576, p-value = 0.05808

mdata$V3 <- predict(log_x(mdata$V3, b=10))
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.WBI.68.S.txt")

mdata <- read.table("KAU.WBI.75.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 179 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2565
##  - Box-Cox: 1.2986
##  - Center+scale: 1.2799
##  - Double Reversed Log_b(x+a): 1.0959
##  - Exp(x): 1.3064
##  - Log_b(x+a): 1.2915
##  - orderNorm (ORQ): 1.254
##  - sqrt(x + a): 1.2565
##  - Yeo-Johnson: 1.33
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized double reversed Log_b(x + a) Transformation with 179 nonmissing obs.:
##  Relevant statistics:
##  - a = 
##  - b = 10 
##  - max(x) = 1.132342 ; min(x) = 0.9793274 
##  - mean (before standardization) = 0.4150691 
##  - sd (before standardization) = 0.178979

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.95537, p-value = 1.916e-05

mdata <- read.table("KAU.WBI.75.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.0672
##  - Box-Cox: 1.0594
##  - Center+scale: 1.0713
##  - Double Reversed Log_b(x+a): 1.8443
##  - Exp(x): 1.0754
##  - Log_b(x+a): 1.0633
##  - orderNorm (ORQ): 1.2621
##  - sqrt(x + a): 1.0672
##  - Yeo-Johnson: 1.0395
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 171 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.1944403 
##  - sd (before standardization) = 0.0002981089

shapiro.test(predict(log_x(mdata$V3, b=10)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3, b = 10))
## W = 0.99212, p-value = 0.4768

mdata$V3 <- predict(log_x(mdata$V3, b=10))
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.WBI.75.S.txt")

KAU END MEASUREMENTS

mdata <- read.table("KAU.HI.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 157 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 0.9763
##  - Box-Cox: 1.0142
##  - Center+scale: 0.9428
##  - Double Reversed Log_b(x+a): 1.4943
##  - Exp(x): 0.9768
##  - Log_b(x+a): 1.2643
##  - orderNorm (ORQ): 1.2365
##  - sqrt(x + a): 1.0492
##  - Yeo-Johnson: 1.023
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## center_scale(x) Transformation with 157 nonmissing obs.
##  Estimated statistics:
##  - mean (before standardization) = 0.2721069 
##  - sd (before standardization) = 0.09873955

shapiro.test(predict(center_scale(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(center_scale(mdata$V3))
## W = 0.97962, p-value = 0.02012

mdata <- read.table("KAU.HI.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 167 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4636
##  - Box-Cox: 1.1409
##  - Center+scale: 1.4801
##  - Double Reversed Log_b(x+a): 2.325
##  - Exp(x): 1.5094
##  - Log_b(x+a): 1.0881
##  - orderNorm (ORQ): 1.1229
##  - sqrt(x + a): 1.2271
##  - Yeo-Johnson: 1.2992
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Log_b(x + a) Transformation with 167 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - b = 10 
##  - mean (before standardization) = -0.8882112 
##  - sd (before standardization) = 0.2170759

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.94495, p-value = 4.409e-06

mdata <- read.table("KAU.IFM.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 143 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 2.5575
##  - Box-Cox: 1.38
##  - Center+scale: 2.6992
##  - Double Reversed Log_b(x+a): 3.4254
##  - Exp(x): 3.0836
##  - Log_b(x+a): 1.9693
##  - orderNorm (ORQ): 1.0909
##  - sqrt(x + a): 2.4101
##  - Yeo-Johnson: 1.4829
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 143 nonmissing obs and ties
##  - 143 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 0.225 0.313 0.337 0.373 0.951

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.61285, p-value < 2.2e-16

mdata <- read.table("KAU.IFM.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 136 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6346
##  - Box-Cox: 1.1695
##  - Center+scale: 1.6522
##  - Double Reversed Log_b(x+a): 2.7774
##  - Exp(x): 1.8552
##  - Log_b(x+a): 1.2161
##  - orderNorm (ORQ): 1.4636
##  - sqrt(x + a): 1.4702
##  - Yeo-Johnson: 1.3515
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 136 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -0.9999576 
##  - mean (before standardization) = -3.045591 
##  - sd (before standardization) = 0.5804935

shapiro.test(predict(boxcox(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(boxcox(mdata$V3))
## W = 0.98546, p-value = 0.1594

mdata$V3 <- predict(boxcox(mdata$V3))
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.IFM.S.txt")

mdata <- read.table("KAU.IFN.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 159 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.3859
##  - Box-Cox: 1.3353
##  - Center+scale: 1.6265
##  - Double Reversed Log_b(x+a): 2.074
##  - Log_b(x+a): 1.3859
##  - orderNorm (ORQ): 1.3077
##  - sqrt(x + a): 1.3738
##  - Yeo-Johnson: 1.3353
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 159 nonmissing obs and ties
##  - 160 unique values 
##  - Original quantiles:
##       0%      25%      50%      75%     100% 
##  100.360  314.542  431.958  566.532 1048.089

shapiro.test(predict(center_scale(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(center_scale(mdata$V3))
## W = 0.96328, p-value = 0.0003174

mdata <- read.table("KAU.IFN.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.406
##  - Box-Cox: 1.3568
##  - Center+scale: 2.2949
##  - Double Reversed Log_b(x+a): 3.605
##  - Exp(x): 7.1975
##  - Log_b(x+a): 1.4019
##  - orderNorm (ORQ): 1.2449
##  - sqrt(x + a): 1.6505
##  - Yeo-Johnson: 1.365
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 171 nonmissing obs and ties
##  - 172 unique values 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
##  30.669  60.251  86.589 141.431 346.183

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.8853, p-value = 3.378e-10

mdata <- read.table("KAU.IFY.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 159 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2681
##  - Box-Cox: 1.355
##  - Center+scale: 1.4635
##  - Double Reversed Log_b(x+a): 2.0671
##  - Exp(x): 7.7781
##  - Log_b(x+a): 1.2681
##  - orderNorm (ORQ): 1.1959
##  - sqrt(x + a): 1.3575
##  - Yeo-Johnson: 1.3682
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 159 nonmissing obs and ties
##  - 160 unique values 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
##  36.984 106.828 141.700 189.408 333.368

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.96695, p-value = 0.0007458

mdata <- read.table("KAU.IFY.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 2.1181
##  - Box-Cox: 1.3724
##  - Center+scale: 3.2358
##  - Double Reversed Log_b(x+a): 3.6922
##  - Exp(x): 7.5318
##  - Log_b(x+a): 2.1099
##  - orderNorm (ORQ): 1.0281
##  - sqrt(x + a): 2.6102
##  - Yeo-Johnson: 1.2051
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 171 nonmissing obs and ties
##  - 171 unique values 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
##  20.457  23.339  27.538  36.905 119.162

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.75123, p-value = 1.019e-15

mdata <- read.table("KAU.MFM.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 147 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 2.2322
##  - Box-Cox: 1.5034
##  - Center+scale: 2.828
##  - Double Reversed Log_b(x+a): 5.3198
##  - Exp(x): 5.7072
##  - Log_b(x+a): 1.7149
##  - orderNorm (ORQ): 1.172
##  - sqrt(x + a): 2.1358
##  - Yeo-Johnson: 1.3728
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 147 nonmissing obs and ties
##  - 148 unique values 
##  - Original quantiles:
##    0%   25%   50%   75%  100% 
## 0.315 0.523 0.593 0.674 2.522

shapiro.test(predict(yeojohnson(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(yeojohnson(mdata$V3))
## W = 0.96071, p-value = 0.0003336

mdata <- read.table("KAU.MFM.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 147 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.4699
##  - Box-Cox: 1.2996
##  - Center+scale: 1.535
##  - Double Reversed Log_b(x+a): 1.3395
##  - Exp(x): 2.6863
##  - Log_b(x+a): 1.345
##  - orderNorm (ORQ): 1.2589
##  - sqrt(x + a): 1.4634
##  - Yeo-Johnson: 1.1712
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 147 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.1604411 
##  - sd (before standardization) = 0.01155271

shapiro.test(predict(boxcox(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(boxcox(mdata$V3))
## W = 0.97978, p-value = 0.02879

mdata <- read.table("KAU.MFN.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 160 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.075
##  - Box-Cox: 1.0881
##  - Center+scale: 1.67
##  - Double Reversed Log_b(x+a): 2.231
##  - Log_b(x+a): 1.075
##  - orderNorm (ORQ): 1.3156
##  - sqrt(x + a): 1.2806
##  - Yeo-Johnson: 1.0881
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 160 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 6.272711 
##  - sd (before standardization) = 0.4521011

shapiro.test(predict(boxcox(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(boxcox(mdata$V3))
## W = 0.99523, p-value = 0.8875

mdata$V3 <- predict(boxcox(mdata$V3))
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.MFN.C.txt")


mdata <- read.table("KAU.MFN.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2829
##  - Box-Cox: 1.1871
##  - Center+scale: 2.1339
##  - Double Reversed Log_b(x+a): 3.3886
##  - Exp(x): 11.866
##  - Log_b(x+a): 1.2829
##  - orderNorm (ORQ): 1.1914
##  - sqrt(x + a): 1.5119
##  - Yeo-Johnson: 1.1994
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 171 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -0.8873626 
##  - mean (before standardization) = 1.107045 
##  - sd (before standardization) = 0.004606803

trait <- mdata$V3
trait <- unlist(trait)
trait2 <- orderNorm(trait)

## Warning in orderNorm(trait): Ties in data, Normal distribution not guaranteed

p <- predict(trait2)
x2 <- predict(trait2, newdata = p, inverse = TRUE)
MASS::truehist(x2, nbins = 12)

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.88633, p-value = 3.845e-10

mdata <- read.table("KAU.MFY.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 160 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6219
##  - Center+scale: 1.5431
##  - Double Reversed Log_b(x+a): 2.4048
##  - Exp(x): 4
##  - Log_b(x+a): 1.1497
##  - orderNorm (ORQ): 1.1275
##  - sqrt(x + a): 1.2511
##  - Yeo-Johnson: 1.4163
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 160 nonmissing obs and ties
##  - 161 unique values 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
##  -5.501 116.783 159.486 237.128 566.579

shapiro.test(predict(log_x(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(log_x(mdata$V3))
## W = 0.4744, p-value < 2.2e-16

mdata <- read.table("KAU.MYF.S.txt", fill = T)
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.2102
##  - Box-Cox: 1.0624
##  - Center+scale: 2.1357
##  - Double Reversed Log_b(x+a): 2.6898
##  - Exp(x): 17.4643
##  - Log_b(x+a): 1.2102
##  - orderNorm (ORQ): 1.1383
##  - sqrt(x + a): 1.3891
##  - Yeo-Johnson: 1.0466
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 171 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -4.99994 
##  - mean (before standardization) = 0.2000024 
##  - sd (before standardization) = 1.526669e-08

shapiro.test(asinh(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  asinh(mdata$V3)
## W = 0.97943, p-value = 0.01224

mdata <- read.table("KAU.TYM.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 160 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.7706
##  - Center+scale: 1.1975
##  - Double Reversed Log_b(x+a): 1.9325
##  - Log_b(x+a): 1.1608
##  - orderNorm (ORQ): 1.2894
##  - sqrt(x + a): 1.0835
##  - Yeo-Johnson: 1.11
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 160 nonmissing obs.:
##  Relevant statistics:
##  - a = 4.010106 
##  - mean (before standardization) = 17.71457 
##  - sd (before standardization) = 4.476314

shapiro.test(sqrt(mdata$V3))

## Warning in sqrt(mdata$V3): NaNs produced

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.99479, p-value = 0.8474

mdata$V3 <- sqrt(mdata$V3)

## Warning in sqrt(mdata$V3): NaNs produced

write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.TYM.C.txt")

mdata <- read.table("KAU.TYM.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1379
##  - Box-Cox: 1.1324
##  - Center+scale: 1.7942
##  - Double Reversed Log_b(x+a): 2.4552
##  - Exp(x): 13.2448
##  - Log_b(x+a): 1.1379
##  - orderNorm (ORQ): 1.2426
##  - sqrt(x + a): 1.3435
##  - Yeo-Johnson: 1.1324
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Box Cox Transformation with 171 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -0.3263948 
##  - mean (before standardization) = 2.26126 
##  - sd (before standardization) = 0.1013365

shapiro.test(predict(boxcox(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(boxcox(mdata$V3))
## W = 0.98563, p-value = 0.07627

write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.TYM.S.txt")

mdata <- read.table("KAU.TYN.C.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 160 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.3288
##  - Box-Cox: 1.3025
##  - Center+scale: 1.3069
##  - Double Reversed Log_b(x+a): 1.7794
##  - Log_b(x+a): 1.3288
##  - orderNorm (ORQ): 1.3375
##  - sqrt(x + a): 1.25
##  - Yeo-Johnson: 1.3025
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 160 nonmissing obs.:
##  Relevant statistics:
##  - a = 0 
##  - mean (before standardization) = 26.69154 
##  - sd (before standardization) = 5.637306

shapiro.test(sqrt(mdata$V3))

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(mdata$V3)
## W = 0.9876, p-value = 0.1684

mdata$V3 <- sqrt(mdata$V3)
write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.TYN.C.txt")


mdata <- read.table("KAU.TYN.S.txt")
MASS::truehist(mdata$V3, nbins = 12)

bestNormalize(mdata$V3)

## Best Normalizing transformation with 171 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1983
##  - Box-Cox: 1.252
##  - Center+scale: 1.759
##  - Double Reversed Log_b(x+a): 3.0963
##  - Exp(x): 4.275
##  - Log_b(x+a): 1.1983
##  - orderNorm (ORQ): 1.3826
##  - sqrt(x + a): 1.3275
##  - Yeo-Johnson: 1.2479
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 171 nonmissing obs.:
##  Relevant statistics:
##  - mean (before standardization) = 5.88078 
##  - sd (before standardization) = 0.3841461

shapiro.test(predict(boxcox(mdata$V3)))

## 
##  Shapiro-Wilk normality test
## 
## data:  predict(boxcox(mdata$V3))
## W = 0.98537, p-value = 0.07063

write.table(mdata, row.names = F, col.names = F, quote = F, sep = '\t', file = "transf.KAU.TYN.S.txt")

Transformation of GWAS values for TPA / KAU data

AGR

RGR

TUE

TPA END MEASUREMENTS

AREA

MRENDVI

PRI

WBI

KAU END MEASUREMENTS