# ref1 : https://www.isixsigma.com/tools-templates/normality/making-data-normal-using-box-cox-power-transformation/
library(MASS)
par(mfrow=c(3,2))
plot(lm(dist~speed,data=cars))
boxcox(lm(dist~speed,data=cars),lambda=seq(0,1,by=.1))

library(Matrix)
library(car)
library(stats)
library(MASS)
library(graphics)
# successful case
par(mfrow=c(3,2))
x <- rexp(100)
qqnorm(x)                 # Normal probability plot for original variable
boxcox(x~1)               # Illustration of Log-Likelihood profile
p <- box.cox.powers(x)    # Estimaton of Box-Cox lambda
'box.cox.powers' is deprecated.
Use 'powerTransform' instead.
See help("Deprecated") and help("car-deprecated").
y <- box.cox(x, p$lambda) # Box-Cox transformation
'box.cox' is deprecated.
Use 'bcPower' instead.
See help("Deprecated") and help("car-deprecated").
car::qqPlot(x); car::qqPlot(y) 
plot(density(x)); plot(density(y))

# unsuccessful case
par(mfrow=c(3,2))
x<-rnorm(1000)^3        
x<-x-min(x)+sd(x)
qqnorm(x)                     # Normal probability plot for original variable
boxcox(x~1)             # Illustration of Log-Likelihood profile
p <- box.cox.powers(x)    # Estimaton of Box-Cox lambda
'box.cox.powers' is deprecated.
Use 'powerTransform' instead.
See help("Deprecated") and help("car-deprecated").
y <- box.cox(x, p$lambda)   # Box-Cox transformation
'box.cox' is deprecated.
Use 'bcPower' instead.
See help("Deprecated") and help("car-deprecated").
car::qqPlot(x); car::qqPlot(y) 
plot(density(x)); plot(density(y))

###################################################################################################################
# transforming skewed data                                                                                        #
# ref1 : http://rcompanion.org/handbook/I_12.html                                                                 #
# ref2 : http://fmwww.bc.edu/repec/bocode/t/transint.html                                                         #
###################################################################################################################
#install.packages('rcompanion')
par(mfrow=c(4,4))
library(rcompanion)
Turbidity = c(1.0, 1.2, 1.1, 1.1, 2.4, 2.2, 2.6, 4.1, 5.0, 10.0, 4.0, 4.1, 4.2, 4.1, 5.1, 4.5, 5.0, 15.2, 10.0, 20.0, 1.1, 1.1, 1.2, 1.6, 2.2, 3.0, 4.0, 10.5)
plotNormalHistogram(Turbidity, main = "original data")
qqnorm(Turbidity, ylab="Sample Quantiles for Turbidity")
qqline(Turbidity, col="red")
# some transformations
T_sqrt = sqrt(Turbidity)
plotNormalHistogram(T_sqrt, main ="square root transformation")
T_cub = sign(Turbidity) * abs(Turbidity)^(1/3)
plotNormalHistogram(T_cub, main ="cube root transformation")
T_log = log(Turbidity)
plotNormalHistogram(T_log, main ="log transformation")
T_tuk = transformTukey(Turbidity, plotit=FALSE)

    lambda     W Shapiro.p.value
397   -0.1 0.935         0.08248

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 
plotNormalHistogram(T_tuk, main = "tukey's ladder of power transformation")
Box = boxcox(Turbidity ~ 1, lambda = seq(-6,6,0.1))
Cox = data.frame(Box$x, Box$y)            
Cox2 = Cox[with(Cox, order(-Cox$Box.y)),] 
Cox2[1,]                                  
lambda = Cox2[1, "Box.x"]                 # Extract that lambda
T_box = (Turbidity ^ lambda - 1)/lambda   # Transform the original data
plotNormalHistogram(T_box, main = "box-cox transformation")
# box-cox T for anova model 
Input =("
        Location Turbidity
        a        1.0
        a        1.2
        a        1.1
        a        1.1
        a        2.4
        a        2.2
        a        2.6
        a        4.1
        a        5.0
        a       10.0
        b        4.0
        b        4.1
        b        4.2
        b        4.1
        b        5.1
        b        4.5
        b        5.0
        b       15.2
        b       10.0
        b       20.0
        c        1.1
        c        1.1
        c        1.2
        c        1.6
        c        2.2
        c        3.0
        c        4.0
        c       10.5
        ")
Data = read.table(textConnection(Input),header=TRUE)
model = lm(Turbidity ~ Location, data=Data)
Anova(model, type = "II")
Anova Table (Type II tests)

Response: Turbidity
          Sum Sq Df F value  Pr(>F)  
Location  132.63  2  3.8651 0.03447 *
Residuals 428.95 25                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
x <- residuals(model)
plotNormalHistogram(x, main = "residuals of un-transformed data")
qqnorm(residuals(model), ylab="Sample Quantiles for residuals")
qqline(residuals(model), col="red")
plot(fitted(model),residuals(model))
Box = boxcox(Turbidity ~ Location,
             data = Data,
             lambda = seq(-6,6,0.1)
)
Cox = data.frame(Box$x, Box$y)
Cox2 = Cox[with(Cox, order(-Cox$Box.y)),]
Cox2[1,]
lambda = Cox2[1, "Box.x"]
Data$Turbidity_box = (Data$Turbidity ^ lambda - 1)/lambda   
boxplot(Turbidity_box ~ Location,
        data = Data,
        ylab="Box–Cox-transformed Turbidity",
        xlab="Location")
model = lm(Turbidity_box ~ Location, data=Data)
Anova(model, type="II")
Anova Table (Type II tests)

Response: Turbidity_box
          Sum Sq Df F value Pr(>F)   
Location  4.1643  2  6.6929 0.0047 **
Residuals 7.7774 25                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
x = residuals(model)
plotNormalHistogram(x, main = "box-cox T model")
qqnorm(residuals(model), ylab="Sample Quantiles for residuals")
qqline(residuals(model), col="red")
plot(fitted(model),residuals(model))

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KIyByZWYxIDogaHR0cHM6Ly93d3cuaXNpeHNpZ21hLmNvbS90b29scy10ZW1wbGF0ZXMvbm9ybWFsaXR5L21ha2luZy1kYXRhLW5vcm1hbC11c2luZy1ib3gtY294LXBvd2VyLXRyYW5zZm9ybWF0aW9uLwoKbGlicmFyeShNQVNTKQoKcGFyKG1mcm93PWMoMywyKSkKcGxvdChsbShkaXN0fnNwZWVkLGRhdGE9Y2FycykpCgpib3hjb3gobG0oZGlzdH5zcGVlZCxkYXRhPWNhcnMpLGxhbWJkYT1zZXEoMCwxLGJ5PS4xKSkKYGBgCgpgYGB7cn0KbGlicmFyeShNYXRyaXgpCmxpYnJhcnkoY2FyKQpsaWJyYXJ5KHN0YXRzKQpsaWJyYXJ5KE1BU1MpCmxpYnJhcnkoZ3JhcGhpY3MpCmBgYAoKYGBge3J9CiMxLiBCb3gtQ294IFRyYW5zZm9ybWF0aW9uCiMgc3VjY2Vzc2Z1bCBjYXNlCnBhcihtZnJvdz1jKDMsMikpCgp4IDwtIHJleHAoMTAwKQpxcW5vcm0oeCkgICAgICAgICAgICAgICAgICMgTm9ybWFsIHByb2JhYmlsaXR5IHBsb3QgZm9yIG9yaWdpbmFsIHZhcmlhYmxlCmJveGNveCh4fjEpICAgICAgICAgICAgICAgIyBJbGx1c3RyYXRpb24gb2YgTG9nLUxpa2VsaWhvb2QgcHJvZmlsZQoKcCA8LSBib3guY294LnBvd2Vycyh4KSAgICAjIEVzdGltYXRvbiBvZiBCb3gtQ294IGxhbWJkYQp5IDwtIGJveC5jb3goeCwgcCRsYW1iZGEpICMgQm94LUNveCB0cmFuc2Zvcm1hdGlvbgoKY2FyOjpxcVBsb3QoeCk7IGNhcjo6cXFQbG90KHkpIApwbG90KGRlbnNpdHkoeCkpOyBwbG90KGRlbnNpdHkoeSkpCmBgYAoKYGBge3J9CiMgdW5zdWNjZXNzZnVsIGNhc2UKcGFyKG1mcm93PWMoMywyKSkKCng8LXJub3JtKDEwMDApXjMgCQkKeDwteC1taW4oeCkrc2QoeCkKcXFub3JtKHgpCQkJICAgICAgICAgICMgTm9ybWFsIHByb2JhYmlsaXR5IHBsb3QgZm9yIG9yaWdpbmFsIHZhcmlhYmxlCmJveGNveCh4fjEpCSAgICAgICAgICAgICMgSWxsdXN0cmF0aW9uIG9mIExvZy1MaWtlbGlob29kIHByb2ZpbGUKCnAgPC0gYm94LmNveC5wb3dlcnMoeCkgICAgIyBFc3RpbWF0b24gb2YgQm94LUNveCBsYW1iZGEKeSA8LSBib3guY294KHgsIHAkbGFtYmRhKQkjIEJveC1Db3ggdHJhbnNmb3JtYXRpb24KCmNhcjo6cXFQbG90KHgpOyBjYXI6OnFxUGxvdCh5KSAKcGxvdChkZW5zaXR5KHgpKTsgcGxvdChkZW5zaXR5KHkpKQpgYGAKCmBgYHtyfQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCiMgdHJhbnNmb3JtaW5nIHNrZXdlZCBkYXRhICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMKIyByZWYxIDogaHR0cDovL3Jjb21wYW5pb24ub3JnL2hhbmRib29rL0lfMTIuaHRtbCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIwojIHJlZjIgOiBodHRwOi8vZm13d3cuYmMuZWR1L3JlcGVjL2JvY29kZS90L3RyYW5zaW50Lmh0bWwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMKI2luc3RhbGwucGFja2FnZXMoJ3Jjb21wYW5pb24nKQpwYXIobWZyb3c9Yyg0LDQpKQpsaWJyYXJ5KHJjb21wYW5pb24pCgpUdXJiaWRpdHkgPSBjKDEuMCwgMS4yLCAxLjEsIDEuMSwgMi40LCAyLjIsIDIuNiwgNC4xLCA1LjAsIDEwLjAsIDQuMCwgNC4xLCA0LjIsIDQuMSwgNS4xLCA0LjUsIDUuMCwgMTUuMiwgMTAuMCwgMjAuMCwgMS4xLCAxLjEsIDEuMiwgMS42LCAyLjIsIDMuMCwgNC4wLCAxMC41KQoKcGxvdE5vcm1hbEhpc3RvZ3JhbShUdXJiaWRpdHksIG1haW4gPSAib3JpZ2luYWwgZGF0YSIpCgpxcW5vcm0oVHVyYmlkaXR5LCB5bGFiPSJTYW1wbGUgUXVhbnRpbGVzIGZvciBUdXJiaWRpdHkiKQpxcWxpbmUoVHVyYmlkaXR5LCBjb2w9InJlZCIpCgojIHNvbWUgdHJhbnNmb3JtYXRpb25zClRfc3FydCA9IHNxcnQoVHVyYmlkaXR5KQpwbG90Tm9ybWFsSGlzdG9ncmFtKFRfc3FydCwgbWFpbiA9InNxdWFyZSByb290IHRyYW5zZm9ybWF0aW9uIikKClRfY3ViID0gc2lnbihUdXJiaWRpdHkpICogYWJzKFR1cmJpZGl0eSleKDEvMykKcGxvdE5vcm1hbEhpc3RvZ3JhbShUX2N1YiwgbWFpbiA9ImN1YmUgcm9vdCB0cmFuc2Zvcm1hdGlvbiIpCgpUX2xvZyA9IGxvZyhUdXJiaWRpdHkpCnBsb3ROb3JtYWxIaXN0b2dyYW0oVF9sb2csIG1haW4gPSJsb2cgdHJhbnNmb3JtYXRpb24iKQoKVF90dWsgPSB0cmFuc2Zvcm1UdWtleShUdXJiaWRpdHksIHBsb3RpdD1GQUxTRSkKcGxvdE5vcm1hbEhpc3RvZ3JhbShUX3R1aywgbWFpbiA9ICJ0dWtleSdzIGxhZGRlciBvZiBwb3dlciB0cmFuc2Zvcm1hdGlvbiIpCgpCb3ggPSBib3hjb3goVHVyYmlkaXR5IH4gMSwgbGFtYmRhID0gc2VxKC02LDYsMC4xKSkKQ294ID0gZGF0YS5mcmFtZShCb3gkeCwgQm94JHkpICAgICAgICAgICAgCkNveDIgPSBDb3hbd2l0aChDb3gsIG9yZGVyKC1Db3gkQm94LnkpKSxdIApDb3gyWzEsXSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKbGFtYmRhID0gQ294MlsxLCAiQm94LngiXSAgICAgICAgICAgICAgICAgIyBFeHRyYWN0IHRoYXQgbGFtYmRhClRfYm94ID0gKFR1cmJpZGl0eSBeIGxhbWJkYSAtIDEpL2xhbWJkYSAgICMgVHJhbnNmb3JtIHRoZSBvcmlnaW5hbCBkYXRhCnBsb3ROb3JtYWxIaXN0b2dyYW0oVF9ib3gsIG1haW4gPSAiYm94LWNveCB0cmFuc2Zvcm1hdGlvbiIpCgoKIyBib3gtY294IFQgZm9yIGFub3ZhIG1vZGVsIApJbnB1dCA9KCIKICAgICAgICBMb2NhdGlvbiBUdXJiaWRpdHkKICAgICAgICBhICAgICAgICAxLjAKICAgICAgICBhICAgICAgICAxLjIKICAgICAgICBhICAgICAgICAxLjEKICAgICAgICBhICAgICAgICAxLjEKICAgICAgICBhICAgICAgICAyLjQKICAgICAgICBhICAgICAgICAyLjIKICAgICAgICBhICAgICAgICAyLjYKICAgICAgICBhICAgICAgICA0LjEKICAgICAgICBhICAgICAgICA1LjAKICAgICAgICBhICAgICAgIDEwLjAKICAgICAgICBiICAgICAgICA0LjAKICAgICAgICBiICAgICAgICA0LjEKICAgICAgICBiICAgICAgICA0LjIKICAgICAgICBiICAgICAgICA0LjEKICAgICAgICBiICAgICAgICA1LjEKICAgICAgICBiICAgICAgICA0LjUKICAgICAgICBiICAgICAgICA1LjAKICAgICAgICBiICAgICAgIDE1LjIKICAgICAgICBiICAgICAgIDEwLjAKICAgICAgICBiICAgICAgIDIwLjAKICAgICAgICBjICAgICAgICAxLjEKICAgICAgICBjICAgICAgICAxLjEKICAgICAgICBjICAgICAgICAxLjIKICAgICAgICBjICAgICAgICAxLjYKICAgICAgICBjICAgICAgICAyLjIKICAgICAgICBjICAgICAgICAzLjAKICAgICAgICBjICAgICAgICA0LjAKICAgICAgICBjICAgICAgIDEwLjUKICAgICAgICAiKQpEYXRhID0gcmVhZC50YWJsZSh0ZXh0Q29ubmVjdGlvbihJbnB1dCksaGVhZGVyPVRSVUUpCgptb2RlbCA9IGxtKFR1cmJpZGl0eSB+IExvY2F0aW9uLCBkYXRhPURhdGEpCkFub3ZhKG1vZGVsLCB0eXBlID0gIklJIikKCnggPC0gcmVzaWR1YWxzKG1vZGVsKQpwbG90Tm9ybWFsSGlzdG9ncmFtKHgsIG1haW4gPSAicmVzaWR1YWxzIG9mIHVuLXRyYW5zZm9ybWVkIGRhdGEiKQoKcXFub3JtKHJlc2lkdWFscyhtb2RlbCksIHlsYWI9IlNhbXBsZSBRdWFudGlsZXMgZm9yIHJlc2lkdWFscyIpCnFxbGluZShyZXNpZHVhbHMobW9kZWwpLCBjb2w9InJlZCIpCnBsb3QoZml0dGVkKG1vZGVsKSxyZXNpZHVhbHMobW9kZWwpKQoKQm94ID0gYm94Y294KFR1cmJpZGl0eSB+IExvY2F0aW9uLAogICAgICAgICAgICAgZGF0YSA9IERhdGEsCiAgICAgICAgICAgICBsYW1iZGEgPSBzZXEoLTYsNiwwLjEpCikKCkNveCA9IGRhdGEuZnJhbWUoQm94JHgsIEJveCR5KQpDb3gyID0gQ294W3dpdGgoQ294LCBvcmRlcigtQ294JEJveC55KSksXQpDb3gyWzEsXQpsYW1iZGEgPSBDb3gyWzEsICJCb3gueCJdCkRhdGEkVHVyYmlkaXR5X2JveCA9IChEYXRhJFR1cmJpZGl0eSBeIGxhbWJkYSAtIDEpL2xhbWJkYSAgIApib3hwbG90KFR1cmJpZGl0eV9ib3ggfiBMb2NhdGlvbiwKICAgICAgICBkYXRhID0gRGF0YSwKICAgICAgICB5bGFiPSJCb3jigJNDb3gtdHJhbnNmb3JtZWQgVHVyYmlkaXR5IiwKICAgICAgICB4bGFiPSJMb2NhdGlvbiIpCgptb2RlbCA9IGxtKFR1cmJpZGl0eV9ib3ggfiBMb2NhdGlvbiwgZGF0YT1EYXRhKQpBbm92YShtb2RlbCwgdHlwZT0iSUkiKQp4ID0gcmVzaWR1YWxzKG1vZGVsKQpwbG90Tm9ybWFsSGlzdG9ncmFtKHgsIG1haW4gPSAiYm94LWNveCBUIG1vZGVsIikKCnFxbm9ybShyZXNpZHVhbHMobW9kZWwpLCB5bGFiPSJTYW1wbGUgUXVhbnRpbGVzIGZvciByZXNpZHVhbHMiKQpxcWxpbmUocmVzaWR1YWxzKG1vZGVsKSwgY29sPSJyZWQiKQpwbG90KGZpdHRlZChtb2RlbCkscmVzaWR1YWxzKG1vZGVsKSkKYGBgCg==