A simple write-up to test normality of recorded phenotypes for new Pareto Front metrics - including the gravity (G) component. The measurements were calculated by Arjun Chandrasekhar, and analyzed by Magda Julkowska as a part of NSF IOS funded research.

Normal distribution assumption testing:

Alpha under Control Conditions

pheno <- read.table("trait.optimal.alpha.C.tfam_aligned.txt")
pheno$V3[pheno$V3 == -9] <- NA
write.table(pheno, "trait.optimal.alpha.C.tfam_aligned_corr.txt", row.names = F)
y <- pheno$V3
y <- y[is.finite(y)]  # drop NA/NaN/Inf
y

##   [1] 0.3925000 0.7250000 0.2816667 0.4550000 0.1437500 0.8300000 0.3883333
##   [8] 0.4750000 0.4587500 0.6166667 0.6350000 0.3587500 0.1250000 0.6000000
##  [15] 0.7950000 0.3916667 0.6612500 0.5983333 0.5266667 0.6983333 0.2333333
##  [22] 0.2150000 0.6083333 0.7316667 0.7600000 0.5975000 0.5750000 0.4550000
##  [29] 0.6500000 0.5083333 0.1887500 0.6842857 0.6583333 0.1975000 0.7125000
##  [36] 0.6500000 0.5975000 0.5625000 0.5180000 0.6775000 0.6000000 0.5612500
##  [43] 0.4433333 0.3262500 0.6900000 0.6533333 0.7362500 0.4320000 0.6900000
##  [50] 0.6562500 0.3687500 0.4687500 0.2683333 0.2212500 0.3162500 0.4375000
##  [57] 0.6725000 0.7333333 0.5083333 0.6833333 0.3762500 0.6412500 0.8250000
##  [64] 0.4187500 0.4750000 0.3400000 0.2890000 0.5390000 0.3710000 0.8140000
##  [71] 0.4190000 0.6270000 0.5260000 0.7250000 0.1970000 0.6675000 0.6866667
##  [78] 0.5866667 0.4287500 0.4050000 0.7012500 0.6250000 0.6566667 0.3833333
##  [85] 0.5350000 0.4762500 0.3225000 0.4400000 0.8333333 0.5933333 0.7037500
##  [92] 0.4216667 0.4425000 0.3750000 0.2750000 0.3750000 0.6800000 0.4487500
##  [99] 0.5037500 0.3625000 0.4125000 0.8150000 0.5583333 0.4600000 0.4637500
## [106] 0.4983333 0.7183333 0.3850000 0.6025000 0.3550000 0.3150000 0.6933333
## [113] 0.6416667 0.5750000 0.5462500 0.2250000 0.1650000 0.3500000 0.6250000
## [120] 0.4375000 0.5650000 0.4883333 0.6250000 0.6100000 0.6237500 0.7312500
## [127] 0.3700000 0.6500000 0.4250000 0.7500000 0.5150000 0.2916667 0.1416667
## [134] 0.1166667 0.5816667 0.5583333 0.4764286 0.4233333 0.6025000 0.3883333
## [141] 0.6537500 0.7250000 0.1916667 0.6533333 0.4883333 0.6466667 0.5500000

First - let’s do some visuals:

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

Now - let’s get some numbers:

Shapiro-Wilk (recommended for n <= 5000)

sh <- shapiro.test(y)
print(sh)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.97417, p-value = 0.007089

Anderson-Darling (good power for many departures from normality)

ad <- nortest::ad.test(y)
  print(ad)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 1.0484, p-value = 0.009076

D’Agostino-Pearson / Jarque-Bera style (skewness + kurtosis)

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: -0.341
## Kurtosis: 2.389 (normal ~ 3)
## Jarque-Bera: 5.133, p=0.0768

Alpha under Salt Stress

pheno <- read.table("trait.optimal.alpha.S.tfam_aligned.txt")
pheno$V3[pheno$V3 == -9] <- NA
write.table(pheno, "trait.optimal.alpha.S.tfam_aligned_corr.txt", row.names = F)
y <- pheno$V3
y <- y[is.finite(y)]  # drop NA/NaN/Inf
y

##   [1] 0.2500000 0.7250000 0.1933333 0.7362500 0.0600000 0.5400000 0.3083333
##   [8] 0.2500000 0.4666667 0.6166667 0.7483333 0.2875000 0.0000000 0.4883333
##  [15] 0.5712500 0.2750000 0.4625000 0.5000000 0.5975000 0.3750000 0.3400000
##  [22] 0.1790000 0.4833333 0.4950000 0.2600000 0.2375000 0.3875000 0.3730000
##  [29] 0.6950000 0.1000000 0.3500000 0.6800000 0.6300000 0.8250000 0.5400000
##  [36] 0.5733333 0.7925000 0.7375000 0.3850000 0.6250000 0.7150000 0.1250000
##  [43] 0.7000000 0.0512500 0.8650000 0.5950000 0.6212500 0.1680000 0.5070000
##  [50] 0.5150000 0.3516667 0.4312500 0.2816667 0.1125000 0.5125000 0.3425000
##  [57] 0.6500000 0.5662500 0.4650000 0.4125000 0.2562500 0.5912500 0.6000000
##  [64] 0.1250000 0.4125000 0.3400000 0.4280000 0.5130000 0.2890000 0.5150000
##  [71] 0.3660000 0.3530000 0.3270000 0.4687500 0.2410000 0.4037500 0.7116667
##  [78] 0.5833333 0.2475000 0.6487500 0.8350000 0.4675000 0.3750000 0.5350000
##  [85] 0.5666667 0.4625000 0.6375000 0.1000000 0.6583333 0.2166667 0.5700000
##  [92] 0.5150000 0.0537500 0.6750000 0.1200000 0.4033333 0.6650000 0.4625000
##  [99] 0.4625000 0.5100000 0.6187500 0.5150000 0.1566667 0.6737500 0.2987500
## [106] 0.5416667 0.4716667 0.5383333 0.6050000 0.2800000 0.4475000 0.6250000
## [113] 0.5333333 0.5212500 0.3583333 0.2000000 0.5816667 0.5000000 0.5450000
## [120] 0.3266667 0.5250000 0.4820000 0.5716667 0.4337500 0.7166667 0.5816667
## [127] 0.2250000 0.3066667 0.0750000 0.5250000 0.3350000 0.3400000 0.3375000
## [134] 0.4500000 0.2250000 0.5083333 0.4000000 0.2316667 0.3366667 0.5350000
## [141] 0.3516667 0.2516667 0.4766667 0.2750000 0.6666667 0.5750000

First - let’s do some visuals:

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

Now - let’s get some numbers:

Shapiro-Wilk (recommended for n <= 5000)

sh <- shapiro.test(y)
print(sh)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.98771, p-value = 0.2245

Anderson-Darling (good power for many departures from normality)

#ad <- 
nortest::ad.test(y)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 0.53801, p-value = 0.1653

  print(ad)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 1.0484, p-value = 0.009076

D’Agostino-Pearson / Jarque-Bera style (skewness + kurtosis)

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: -0.195
## Kurtosis: 2.472 (normal ~ 3)
## Jarque-Bera: 2.623, p=0.269

G under Control Conditions

pheno <- read.table("trait.optimal.G.C.tfam_aligned.txt")
pheno$V3[pheno$V3 == -9] <- NA
write.table(pheno, "trait.optimal.G.C.tfam_aligned_corr.txt", row.names = F)
y <- pheno$V3
y <- y[is.finite(y)]  # drop NA/NaN/Inf
y

##   [1]  7.500000e-02  1.000000e-01  3.333333e-02 -5.000000e-02  2.375000e-01
##   [6]  1.125000e-01  0.000000e+00  2.500000e-01  1.375000e-01  8.333333e-02
##  [11]  8.333333e-02  1.250000e-02  1.750000e-01  0.000000e+00 -2.500000e-02
##  [16]  1.333333e-01 -1.250000e-01  1.666667e-02 -3.333333e-02  1.833333e-01
##  [21]  6.666667e-02  0.000000e+00 -8.333333e-02  1.666667e-02  2.000000e-02
##  [26] -2.500000e-01  3.000000e-01  8.000000e-02 -2.000000e-01  8.333333e-02
##  [31]  2.125000e-01  2.142857e-01  1.000000e-01  2.250000e-01  1.250000e-01
##  [36]  3.333333e-02  7.500000e-02  1.750000e-01  5.000000e-02  1.125000e-01
##  [41] -2.000000e-01  3.750000e-02 -5.000000e-02  1.000000e-01  1.250000e-02
##  [46]  0.000000e+00  1.875000e-01  1.500000e-01  1.100000e-01  1.250000e-01
##  [51] -1.250000e-02 -7.500000e-02  1.166667e-01 -8.750000e-02 -5.000000e-02
##  [56]  1.375000e-01  0.000000e+00  1.666667e-02  8.333333e-02 -1.666667e-02
##  [61]  2.500000e-02  3.750000e-02 -2.000000e-01  1.250000e-01  4.000000e-02
##  [66]  1.333333e-01  1.100000e-01  1.000000e-01  2.000000e-02  5.000000e-02
##  [71]  2.400000e-01  1.000000e-01 -1.000000e-02 -7.500000e-02 -2.000000e-02
##  [76]  5.000000e-02  1.000000e-01 -6.666667e-02  3.750000e-02 -3.750000e-02
##  [81] -1.125000e-01  2.500000e-02  8.333333e-02 -3.333333e-02  1.333333e-01
##  [86]  0.000000e+00  5.000000e-02  2.166667e-01 -1.666667e-02 -1.666667e-02
##  [91]  8.750000e-02  1.833333e-01  1.250000e-01 -4.000000e-01  1.333333e-01
##  [96]  2.166667e-01  1.166667e-01 -1.250000e-02  3.750000e-02  2.500000e-02
## [101] -1.000000e-01  2.666667e-01  1.833333e-01 -3.750000e-02  7.500000e-02
## [106] -6.666667e-02  0.000000e+00  0.000000e+00 -5.000000e-02  6.666667e-02
## [111] -1.250000e-02  5.000000e-02  3.333333e-02 -5.000000e-02  2.375000e-01
## [116]  1.166667e-01  2.000000e-01 -2.500000e-01 -1.000000e-01  5.000000e-02
## [121] -1.000000e-01 -1.000000e-01  1.000000e-01  7.000000e-02 -3.750000e-02
## [126]  5.000000e-02  8.333333e-02 -5.000000e-02 -5.000000e-02 -5.000000e-02
## [131]  1.333333e-01  2.000000e-01  6.666667e-02  1.166667e-01  6.666667e-02
## [136]  3.333333e-02  3.357143e-01  1.833333e-01 -2.500000e-02  8.333333e-02
## [141] -2.500000e-02  9.250000e-18 -2.833333e-01 -1.666667e-02 -3.000000e-01
## [146]  3.333333e-02 -6.250000e-02

First - let’s do some visuals:

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

Now - let’s get some numbers:

Shapiro-Wilk (recommended for n <= 5000)

sh <- shapiro.test(y)
print(sh)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.9712, p-value = 0.003472

Anderson-Darling (good power for many departures from normality)

ad <- nortest::ad.test(y)
  print(ad)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 0.93301, p-value = 0.01752

D’Agostino-Pearson / Jarque-Bera style (skewness + kurtosis)

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: -0.563
## Kurtosis: 4.337 (normal ~ 3)
## Jarque-Bera: 18.703, p=8.68e-05

G under Salt Stress

pheno <- read.table("trait.optimal.G.S.tfam_aligned.txt")
pheno$V3[pheno$V3 == -9] <- NA
write.table(pheno, "trait.optimal.G.S.tfam_aligned_corr.txt", row.names = F)
y <- pheno$V3
y <- y[is.finite(y)]  # drop NA/NaN/Inf
y

##   [1]  0.03750000  0.35000000 -0.23333333 -0.17500000  0.06250000 -0.06250000
##   [7]  0.06666667  0.10000000  0.03333333  0.26666667  0.13333333  0.16250000
##  [13]  0.67500000  0.06666667 -0.05000000  0.33333333  0.01250000  0.18333333
##  [19]  0.00000000 -1.10000000  0.06666667 -0.27000000  0.05000000  0.06000000
##  [25]  0.18000000  0.87500000  0.12500000  0.35000000 -0.05000000  0.51250000
##  [31]  0.20000000  0.16000000  0.38333333  1.10000000  0.18333333  0.31666667
##  [37]  0.27500000 -0.55000000  0.08000000 -0.01250000  0.05000000 -0.08750000
##  [43]  0.15000000  0.02500000 -0.03333333  0.13750000 -0.08750000  0.08000000
##  [49]  0.25000000 -0.05000000  0.40000000 -0.03750000 -0.05000000  0.02500000
##  [55]  0.05000000  0.13750000 -0.08333333  0.22500000  0.40000000 -0.05000000
##  [61]  0.05000000  0.17500000  0.15000000  0.71250000  0.27500000  0.08000000
##  [67] -0.02000000  0.22000000  0.02000000  0.02000000 -0.11000000 -0.01000000
##  [73]  0.08000000 -0.11250000 -0.31000000  0.08750000  0.21666667  0.36666667
##  [79] -0.11250000  0.18750000  0.20000000  0.08750000  0.30000000  0.00000000
##  [85]  0.05000000  0.06250000  0.35000000  0.03333333  0.16666667  0.01666667
##  [91] -0.06250000  0.63333333  0.23750000  0.05000000  0.60000000  0.05000000
##  [97] -0.38333333  0.12500000 -0.23750000  0.17500000 -0.03750000 -0.08750000
## [103]  0.40000000  0.56250000  0.02500000  0.71666667  0.25000000 -0.05000000
## [109]  0.10000000 -0.01666667 -0.11250000  0.05000000  0.11666667  0.18750000
## [115] -0.33333333  0.06666667 -0.15000000  0.00000000 -0.10000000  0.20000000
## [121] -1.05000000 -0.30000000  0.00000000  0.07500000 -0.51666667 -0.28333333
## [127]  0.22500000  0.06666667  0.40000000  0.05000000  0.13333333  0.23333333
## [133]  0.07500000  0.00000000  0.01666667  0.42500000 -0.05000000 -0.16666667
## [139]  0.05000000  0.15000000 -0.01666667  0.06666667  0.41666667  0.45000000
## [145] -0.01666667 -0.12500000

First - let’s do some visuals:

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

Now - let’s get some numbers:

Shapiro-Wilk (recommended for n <= 5000)

sh <- shapiro.test(y)
print(sh)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.90677, p-value = 4.53e-08

Anderson-Darling (good power for many departures from normality)

#ad <- 
nortest::ad.test(y)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 3.7265, p-value = 2.439e-09

  print(ad)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 0.93301, p-value = 0.01752

D’Agostino-Pearson / Jarque-Bera style (skewness + kurtosis)

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: -0.373
## Kurtosis: 7.696 (normal ~ 3)
## Jarque-Bera: 137.519, p=0

Distance under Control Conditions

pheno <- read.table("trait.optimal.pt.dist.C.tfam_aligned.txt")
pheno$V3[pheno$V3 == -9] <- NA
write.table(pheno, "trait.optimal.pt.dist.C.tfam_aligned_corr.txt", row.names = F)
y <- pheno$V3
y <- y[is.finite(y)]  # drop NA/NaN/Inf
y

##   [1]  108.138160   30.141398   66.372139   10.435085 1351.407706   12.345047
##   [7]   35.431202  202.653210   19.696191    3.511861   13.078911  174.571749
##  [13] 1746.081766   41.954239   10.513659   41.842746   12.609847   14.857870
##  [19]   47.714177    1.252295  422.416490 1420.195928   17.390426    6.041013
##  [25]   26.367183    2.291419   79.137011   21.597793    4.072800  458.809686
##  [31]   27.783039   14.600634   35.330620   62.133809   25.773820   19.951493
##  [37]    8.184837   65.566886  126.932776  103.110046    5.951883   13.261498
##  [43]   22.424597  302.094433   13.276339   32.301668   21.050471   78.107556
##  [49]   32.787928    6.739327  213.671071   33.846197   66.605535   35.065179
##  [55]  214.375047  114.164536   25.411503   69.535953  870.663103  463.667339
##  [61]   90.064599   45.265395    4.574525   27.591675    6.654747  143.806478
##  [67]  133.260335   69.941606   71.175888   22.593823  177.733739   65.248137
##  [73]   84.830800   28.096349   80.502093   74.041671   20.255109  122.890187
##  [79]   15.229126  119.104105   18.918267   53.326051  663.756858   39.850541
##  [85]   27.646989   58.156664   21.218437 1231.848504   11.931623  271.210669
##  [91]    9.470194   43.388089   10.110317    3.861678  139.709180  231.697644
##  [97]   26.736405   72.422540   16.018204   31.049073   18.850080   16.926488
## [103]  145.690269   35.222961  136.475813   18.035863   36.009279  199.313220
## [109]   10.530507  167.508743   10.585593   54.025920   27.038711   37.070434
## [115]   68.215859  169.825965  134.311859  244.150570   16.610092   52.215851
## [121]    2.418298  334.966561   29.275482   44.131412   43.487522   54.381604
## [127]   61.019376   38.627332   73.804338   16.832953   59.352052   72.976966
## [133]   18.949380  384.182359   10.923122   27.649444   40.092263   15.557702
## [139]   29.237500    9.664503   25.794224  103.925174   95.474830   20.340701
## [145]    7.878581   18.282450   16.664259

First - let’s do some visuals:

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

Now - let’s get some numbers:

Shapiro-Wilk (recommended for n <= 5000)

sh <- shapiro.test(y)
print(sh)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.42921, p-value < 2.2e-16

Anderson-Darling (good power for many departures from normality)

ad <- nortest::ad.test(y)
  print(ad)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 28.298, p-value < 2.2e-16

D’Agostino-Pearson / Jarque-Bera style (skewness + kurtosis)

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: 4.392
## Kurtosis: 23.626 (normal ~ 3)
## Jarque-Bera: 3078.210, p=0

Distance under Salt Stress

pheno <- read.table("trait.optimal.pt.dist.S.tfam_aligned.txt")
pheno$V3[pheno$V3 == -9] <- NA
write.table(pheno, "trait.optimal.pt.dist.S.tfam_aligned_corr.txt", row.names = F)
y <- pheno$V3
y <- y[is.finite(y)]  # drop NA/NaN/Inf
y

##   [1] 2.148712e+00 1.082508e+00 4.073213e+01 3.235350e+00 1.524937e+02
##   [6] 2.421165e+00 6.657706e+01 1.247843e+01 2.401958e-01 1.903473e-01
##  [11] 7.920330e-01 7.858670e+01 1.677544e+02 1.808478e+01 5.236054e-01
##  [16] 3.747291e+01 2.781654e+00 7.638735e-01 3.268116e+00 1.520000e-04
##  [21] 6.431960e+02 1.711250e+03 1.226688e+00 3.263415e+00 4.470739e+00
##  [26] 6.639455e-01 2.204882e+00 7.521949e-01 7.291830e-01 1.870656e+01
##  [31] 6.222517e-01 8.306098e-01 1.821510e-01 1.515020e-01 2.962203e-01
##  [36] 8.912023e+00 5.028825e-02 2.024666e+01 6.099293e+00 1.015679e+01
##  [41] 1.320005e-01 2.219239e+00 3.986750e-03 2.089759e+01 3.915172e-01
##  [46] 1.025288e+00 1.224009e+02 1.644496e+01 1.121709e+01 1.557262e+00
##  [51] 2.038223e+01 1.014905e+01 9.322021e+01 3.410802e+01 9.924297e+00
##  [56] 4.869872e+01 9.815235e-01 8.520413e+00 1.882276e+01 8.842347e+01
##  [61] 1.231976e+01 1.298385e+01 5.222550e-02 1.329832e+01 2.755623e-01
##  [66] 1.405821e+01 1.884315e+00 1.884438e+00 1.272932e+02 3.091614e+00
##  [71] 8.654340e+00 1.415860e+02 1.570090e+01 1.668601e+00 1.254543e+02
##  [76] 2.152426e+01 3.390944e+00 1.096712e+02 1.937047e+01 3.643801e+00
##  [81] 9.131500e-03 2.138184e+00 1.450974e+01 6.490710e-01 3.343057e-01
##  [86] 6.933774e+00 1.449108e-01 1.233285e+01 4.950908e-01 6.262271e+01
##  [91] 9.318416e+00 2.654571e+00 3.379163e+01 2.499640e+00 2.126328e-01
##  [96] 1.223865e+02 4.003100e-02 1.726352e+01 7.392477e-01 3.449403e-01
## [101] 1.985507e+00 2.420048e+00 4.742411e+01 6.599089e-01 1.721630e+01
## [106] 4.689806e+00 2.582590e+00 6.705422e+01 1.480298e+00 3.191254e+01
## [111] 1.988619e+00 1.226426e+01 4.221593e+00 1.280620e+01 3.286986e+02
## [116] 2.432353e+00 3.462559e+00 2.110248e+01 1.393420e+00 1.483306e+01
## [121] 2.830000e-04 1.827898e+00 1.261423e+00 3.642247e+00 7.994826e+01
## [126] 1.024490e+00 1.468993e+01 1.101423e-01 4.546213e+01 7.983970e-01
## [131] 3.779305e+00 5.771817e+00 1.179780e+01 1.325290e+00 1.748261e+00
## [136] 4.050743e-01 2.842478e+00 1.861722e+01 4.107853e-01 2.004206e+00
## [141] 6.708759e+00 3.062644e+00 2.282958e-01 1.543885e-01 2.937988e+00
## [146] 1.810371e+00

First - let’s do some visuals:

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

Now - let’s get some numbers:

Shapiro-Wilk (recommended for n <= 5000)

sh <- shapiro.test(y)
print(sh)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.20557, p-value < 2.2e-16

Anderson-Darling (good power for many departures from normality)

#ad <- 
nortest::ad.test(y)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 38.396, p-value < 2.2e-16

  print(ad)

## 
##  Anderson-Darling normality test
## 
## data:  y
## A = 28.298, p-value < 2.2e-16

D’Agostino-Pearson / Jarque-Bera style (skewness + kurtosis)

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: 9.265
## Kurtosis: 97.271 (normal ~ 3)
## Jarque-Bera: 56151.104, p=0

Normalization of non-normally distributed traits:

OK now we have established which traits need normalization - let’s do that.

First - let’s set up some helper functions:

is_num <- function(x) is.numeric(x) || is.integer(x)

## Rank-based inverse normal transformation (Blom’s formula by default)
INT <- function(x, ties.method = "average") {
  x <- as.numeric(x)
  ok <- is.finite(x)
  out <- rep(NA_real_, length(x))
  r <- rank(x[ok], ties.method = ties.method)
  n <- length(r)
  out[ok] <- qnorm((r - 3/8) / (n + 1/4))
  out
}

## Shift to make values strictly positive (needed for log/sqrt/BoxCox)
shift_to_positive <- function(x, eps = 1e-6) {
  x <- as.numeric(x)
  m <- min(x, na.rm = TRUE)
  if (!is.finite(m)) return(x)
  if (m <= 0) x <- x + (-m) + eps
  x
}

stopifnot(is_num(y))

Alpha under Control

pheno <- read.table("trait.optimal.alpha.C.tfam_aligned_corr.txt", header = T)
pheno

pheno$transformed <- INT(pheno$V3)
pheno

y <- pheno$transformed
y <- y[is.finite(y)]  # drop NA/NaN/Inf

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: -0.000
## Kurtosis: 2.819 (normal ~ 3)
## Jarque-Bera: 0.201, p=0.905

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

OK - looks good - let’s save this:

pheno2 <- pheno[,c(1:2,4)]
pheno2

write.table(pheno2, row.names = F, col.names = F, "trait.optimal.alpha.C.al.cor.transformed.txt")

Alpha under Salt

pheno <- read.table("trait.optimal.alpha.S.tfam_aligned_corr.txt", header = T)
pheno

pheno$transformed <- INT(pheno$V3)
pheno

y <- pheno$transformed
y <- y[is.finite(y)]  # drop NA/NaN/Inf

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: 0.000
## Kurtosis: 2.818 (normal ~ 3)
## Jarque-Bera: 0.201, p=0.904

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

OK - looks good - let’s save this:

pheno2 <- pheno[,c(1:2,4)]
pheno2

write.table(pheno2, row.names = F, col.names = F, "trait.optimal.alpha.S.al.cor.transformed.txt")

G under Control

pheno <- read.table("trait.optimal.G.C.tfam_aligned_corr.txt", header = T)
pheno

pheno$transformed <- INT(pheno$V3)
pheno

y <- pheno$transformed
y <- y[is.finite(y)]  # drop NA/NaN/Inf

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: 0.002
## Kurtosis: 2.820 (normal ~ 3)
## Jarque-Bera: 0.199, p=0.905

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

OK - looks good - let’s save this:

pheno2 <- pheno[,c(1:2,4)]
pheno2

write.table(pheno2, row.names = F, col.names = F, "trait.optimal.G.C.al.cor.transformed.txt")

G under Salt

pheno <- read.table("trait.optimal.G.S.tfam_aligned_corr.txt", header = T)
pheno

pheno$transformed <- INT(pheno$V3)
pheno

y <- pheno$transformed
y <- y[is.finite(y)]  # drop NA/NaN/Inf

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: 0.000
## Kurtosis: 2.820 (normal ~ 3)
## Jarque-Bera: 0.197, p=0.906

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

OK - looks good - let’s save this:

pheno2 <- pheno[,c(1:2,4)]
pheno2

write.table(pheno2, row.names = F, col.names = F, "trait.optimal.G.S.al.cor.transformed.txt")

Dist under Control

pheno <- read.table("trait.optimal.pt.dist.C.tfam_aligned_corr.txt", header = T)
pheno

pheno$transformed <- INT(pheno$V3)
pheno

y <- pheno$transformed
y <- y[is.finite(y)]  # drop NA/NaN/Inf

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: 0.000
## Kurtosis: 2.819 (normal ~ 3)
## Jarque-Bera: 0.201, p=0.904

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

OK - looks good - let’s save this:

pheno2 <- pheno[,c(1:2,4)]
pheno2

write.table(pheno2, row.names = F, col.names = F, "trait.optimal.pt.dist.C.al.cor.transformed.txt")

Dist under Salt

pheno <- read.table("trait.optimal.pt.dist.S.tfam_aligned_corr.txt", header = T)
pheno

pheno$transformed <- INT(pheno$V3)
pheno

y <- pheno$transformed
y <- y[is.finite(y)]  # drop NA/NaN/Inf

n <- length(y)
sk <- moments::skewness(y)
ku <- moments::kurtosis(y)  # by default, "raw" kurtosis (normal ~ 3)
jb <- n/6 * (sk^2 + ((ku - 3)^2)/4)  # Jarque–Bera statistic approximation
p_jb <- 1 - pchisq(jb, df = 2)
cat(sprintf("Skewness: %.3f\nKurtosis: %.3f (normal ~ 3)\nJarque-Bera: %.3f, p=%.3g\n",
              sk, ku, jb, p_jb))

## Skewness: 0.000
## Kurtosis: 2.818 (normal ~ 3)
## Jarque-Bera: 0.201, p=0.904

hist(y, breaks = "FD", main = "Trait distribution", xlab = "Trait", col = "grey80", border = "white")
lines(density(y), lwd = 2)

qqnorm(y, main = "Q-Q plot of trait")
qqline(y, lwd = 2)

OK - looks good - let’s save this:

pheno2 <- pheno[,c(1:2,4)]
pheno2

write.table(pheno2, row.names = F, col.names = F, "trait.optimal.pt.dist.S.al.cor.transformed.txt")

Normality testing for GWAS - new Pareto Front metrics

Normal distribution assumption testing:

Alpha under Control Conditions

Alpha under Salt Stress

G under Control Conditions

G under Salt Stress

Distance under Control Conditions

Distance under Salt Stress

Normalization of non-normally distributed traits:

Alpha under Control

Alpha under Salt

G under Control

G under Salt

Dist under Control

Dist under Salt