# ref1 : https://www.isixsigma.com/tools-templates/normality/making-data-normal-using-box-cox-power-transformation/
library(MASS)
par(mfrow=c(3,2))
plot(lm(dist~speed,data=cars))
boxcox(lm(dist~speed,data=cars),lambda=seq(0,1,by=.1))

library(Matrix)
library(car)
library(stats)
library(MASS)
library(graphics)
# successful case
par(mfrow=c(3,2))
x <- rexp(100)
qqnorm(x) # Normal probability plot for original variable
boxcox(x~1) # Illustration of Log-Likelihood profile
p <- box.cox.powers(x) # Estimaton of Box-Cox lambda
'box.cox.powers' is deprecated.
Use 'powerTransform' instead.
See help("Deprecated") and help("car-deprecated").
y <- box.cox(x, p$lambda) # Box-Cox transformation
'box.cox' is deprecated.
Use 'bcPower' instead.
See help("Deprecated") and help("car-deprecated").
car::qqPlot(x); car::qqPlot(y)
plot(density(x)); plot(density(y))

# unsuccessful case
par(mfrow=c(3,2))
x<-rnorm(1000)^3
x<-x-min(x)+sd(x)
qqnorm(x) # Normal probability plot for original variable
boxcox(x~1) # Illustration of Log-Likelihood profile
p <- box.cox.powers(x) # Estimaton of Box-Cox lambda
'box.cox.powers' is deprecated.
Use 'powerTransform' instead.
See help("Deprecated") and help("car-deprecated").
y <- box.cox(x, p$lambda) # Box-Cox transformation
'box.cox' is deprecated.
Use 'bcPower' instead.
See help("Deprecated") and help("car-deprecated").
car::qqPlot(x); car::qqPlot(y)
plot(density(x)); plot(density(y))

###################################################################################################################
# transforming skewed data #
# ref1 : http://rcompanion.org/handbook/I_12.html #
# ref2 : http://fmwww.bc.edu/repec/bocode/t/transint.html #
###################################################################################################################
#install.packages('rcompanion')
par(mfrow=c(4,4))
library(rcompanion)
Turbidity = c(1.0, 1.2, 1.1, 1.1, 2.4, 2.2, 2.6, 4.1, 5.0, 10.0, 4.0, 4.1, 4.2, 4.1, 5.1, 4.5, 5.0, 15.2, 10.0, 20.0, 1.1, 1.1, 1.2, 1.6, 2.2, 3.0, 4.0, 10.5)
plotNormalHistogram(Turbidity, main = "original data")
qqnorm(Turbidity, ylab="Sample Quantiles for Turbidity")
qqline(Turbidity, col="red")
# some transformations
T_sqrt = sqrt(Turbidity)
plotNormalHistogram(T_sqrt, main ="square root transformation")
T_cub = sign(Turbidity) * abs(Turbidity)^(1/3)
plotNormalHistogram(T_cub, main ="cube root transformation")
T_log = log(Turbidity)
plotNormalHistogram(T_log, main ="log transformation")
T_tuk = transformTukey(Turbidity, plotit=FALSE)
lambda W Shapiro.p.value
397 -0.1 0.935 0.08248
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
plotNormalHistogram(T_tuk, main = "tukey's ladder of power transformation")
Box = boxcox(Turbidity ~ 1, lambda = seq(-6,6,0.1))
Cox = data.frame(Box$x, Box$y)
Cox2 = Cox[with(Cox, order(-Cox$Box.y)),]
Cox2[1,]
lambda = Cox2[1, "Box.x"] # Extract that lambda
T_box = (Turbidity ^ lambda - 1)/lambda # Transform the original data
plotNormalHistogram(T_box, main = "box-cox transformation")
# box-cox T for anova model
Input =("
Location Turbidity
a 1.0
a 1.2
a 1.1
a 1.1
a 2.4
a 2.2
a 2.6
a 4.1
a 5.0
a 10.0
b 4.0
b 4.1
b 4.2
b 4.1
b 5.1
b 4.5
b 5.0
b 15.2
b 10.0
b 20.0
c 1.1
c 1.1
c 1.2
c 1.6
c 2.2
c 3.0
c 4.0
c 10.5
")
Data = read.table(textConnection(Input),header=TRUE)
model = lm(Turbidity ~ Location, data=Data)
Anova(model, type = "II")
Anova Table (Type II tests)
Response: Turbidity
Sum Sq Df F value Pr(>F)
Location 132.63 2 3.8651 0.03447 *
Residuals 428.95 25
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
x <- residuals(model)
plotNormalHistogram(x, main = "residuals of un-transformed data")
qqnorm(residuals(model), ylab="Sample Quantiles for residuals")
qqline(residuals(model), col="red")
plot(fitted(model),residuals(model))
Box = boxcox(Turbidity ~ Location,
data = Data,
lambda = seq(-6,6,0.1)
)
Cox = data.frame(Box$x, Box$y)
Cox2 = Cox[with(Cox, order(-Cox$Box.y)),]
Cox2[1,]
lambda = Cox2[1, "Box.x"]
Data$Turbidity_box = (Data$Turbidity ^ lambda - 1)/lambda
boxplot(Turbidity_box ~ Location,
data = Data,
ylab="Box–Cox-transformed Turbidity",
xlab="Location")
model = lm(Turbidity_box ~ Location, data=Data)
Anova(model, type="II")
Anova Table (Type II tests)
Response: Turbidity_box
Sum Sq Df F value Pr(>F)
Location 4.1643 2 6.6929 0.0047 **
Residuals 7.7774 25
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
x = residuals(model)
plotNormalHistogram(x, main = "box-cox T model")
qqnorm(residuals(model), ylab="Sample Quantiles for residuals")
qqline(residuals(model), col="red")
plot(fitted(model),residuals(model))

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KIyByZWYxIDogaHR0cHM6Ly93d3cuaXNpeHNpZ21hLmNvbS90b29scy10ZW1wbGF0ZXMvbm9ybWFsaXR5L21ha2luZy1kYXRhLW5vcm1hbC11c2luZy1ib3gtY294LXBvd2VyLXRyYW5zZm9ybWF0aW9uLwoKbGlicmFyeShNQVNTKQoKcGFyKG1mcm93PWMoMywyKSkKcGxvdChsbShkaXN0fnNwZWVkLGRhdGE9Y2FycykpCgpib3hjb3gobG0oZGlzdH5zcGVlZCxkYXRhPWNhcnMpLGxhbWJkYT1zZXEoMCwxLGJ5PS4xKSkKYGBgCgpgYGB7cn0KbGlicmFyeShNYXRyaXgpCmxpYnJhcnkoY2FyKQpsaWJyYXJ5KHN0YXRzKQpsaWJyYXJ5KE1BU1MpCmxpYnJhcnkoZ3JhcGhpY3MpCmBgYAoKYGBge3J9CiMxLiBCb3gtQ294IFRyYW5zZm9ybWF0aW9uCiMgc3VjY2Vzc2Z1bCBjYXNlCnBhcihtZnJvdz1jKDMsMikpCgp4IDwtIHJleHAoMTAwKQpxcW5vcm0oeCkgICAgICAgICAgICAgICAgICMgTm9ybWFsIHByb2JhYmlsaXR5IHBsb3QgZm9yIG9yaWdpbmFsIHZhcmlhYmxlCmJveGNveCh4fjEpICAgICAgICAgICAgICAgIyBJbGx1c3RyYXRpb24gb2YgTG9nLUxpa2VsaWhvb2QgcHJvZmlsZQoKcCA8LSBib3guY294LnBvd2Vycyh4KSAgICAjIEVzdGltYXRvbiBvZiBCb3gtQ294IGxhbWJkYQp5IDwtIGJveC5jb3goeCwgcCRsYW1iZGEpICMgQm94LUNveCB0cmFuc2Zvcm1hdGlvbgoKY2FyOjpxcVBsb3QoeCk7IGNhcjo6cXFQbG90KHkpIApwbG90KGRlbnNpdHkoeCkpOyBwbG90KGRlbnNpdHkoeSkpCmBgYAoKYGBge3J9CiMgdW5zdWNjZXNzZnVsIGNhc2UKcGFyKG1mcm93PWMoMywyKSkKCng8LXJub3JtKDEwMDApXjMgCQkKeDwteC1taW4oeCkrc2QoeCkKcXFub3JtKHgpCQkJICAgICAgICAgICMgTm9ybWFsIHByb2JhYmlsaXR5IHBsb3QgZm9yIG9yaWdpbmFsIHZhcmlhYmxlCmJveGNveCh4fjEpCSAgICAgICAgICAgICMgSWxsdXN0cmF0aW9uIG9mIExvZy1MaWtlbGlob29kIHByb2ZpbGUKCnAgPC0gYm94LmNveC5wb3dlcnMoeCkgICAgIyBFc3RpbWF0b24gb2YgQm94LUNveCBsYW1iZGEKeSA8LSBib3guY294KHgsIHAkbGFtYmRhKQkjIEJveC1Db3ggdHJhbnNmb3JtYXRpb24KCmNhcjo6cXFQbG90KHgpOyBjYXI6OnFxUGxvdCh5KSAKcGxvdChkZW5zaXR5KHgpKTsgcGxvdChkZW5zaXR5KHkpKQpgYGAKCmBgYHtyfQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCiMgdHJhbnNmb3JtaW5nIHNrZXdlZCBkYXRhICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMKIyByZWYxIDogaHR0cDovL3Jjb21wYW5pb24ub3JnL2hhbmRib29rL0lfMTIuaHRtbCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIwojIHJlZjIgOiBodHRwOi8vZm13d3cuYmMuZWR1L3JlcGVjL2JvY29kZS90L3RyYW5zaW50Lmh0bWwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMKI2luc3RhbGwucGFja2FnZXMoJ3Jjb21wYW5pb24nKQpwYXIobWZyb3c9Yyg0LDQpKQpsaWJyYXJ5KHJjb21wYW5pb24pCgpUdXJiaWRpdHkgPSBjKDEuMCwgMS4yLCAxLjEsIDEuMSwgMi40LCAyLjIsIDIuNiwgNC4xLCA1LjAsIDEwLjAsIDQuMCwgNC4xLCA0LjIsIDQuMSwgNS4xLCA0LjUsIDUuMCwgMTUuMiwgMTAuMCwgMjAuMCwgMS4xLCAxLjEsIDEuMiwgMS42LCAyLjIsIDMuMCwgNC4wLCAxMC41KQoKcGxvdE5vcm1hbEhpc3RvZ3JhbShUdXJiaWRpdHksIG1haW4gPSAib3JpZ2luYWwgZGF0YSIpCgpxcW5vcm0oVHVyYmlkaXR5LCB5bGFiPSJTYW1wbGUgUXVhbnRpbGVzIGZvciBUdXJiaWRpdHkiKQpxcWxpbmUoVHVyYmlkaXR5LCBjb2w9InJlZCIpCgojIHNvbWUgdHJhbnNmb3JtYXRpb25zClRfc3FydCA9IHNxcnQoVHVyYmlkaXR5KQpwbG90Tm9ybWFsSGlzdG9ncmFtKFRfc3FydCwgbWFpbiA9InNxdWFyZSByb290IHRyYW5zZm9ybWF0aW9uIikKClRfY3ViID0gc2lnbihUdXJiaWRpdHkpICogYWJzKFR1cmJpZGl0eSleKDEvMykKcGxvdE5vcm1hbEhpc3RvZ3JhbShUX2N1YiwgbWFpbiA9ImN1YmUgcm9vdCB0cmFuc2Zvcm1hdGlvbiIpCgpUX2xvZyA9IGxvZyhUdXJiaWRpdHkpCnBsb3ROb3JtYWxIaXN0b2dyYW0oVF9sb2csIG1haW4gPSJsb2cgdHJhbnNmb3JtYXRpb24iKQoKVF90dWsgPSB0cmFuc2Zvcm1UdWtleShUdXJiaWRpdHksIHBsb3RpdD1GQUxTRSkKcGxvdE5vcm1hbEhpc3RvZ3JhbShUX3R1aywgbWFpbiA9ICJ0dWtleSdzIGxhZGRlciBvZiBwb3dlciB0cmFuc2Zvcm1hdGlvbiIpCgpCb3ggPSBib3hjb3goVHVyYmlkaXR5IH4gMSwgbGFtYmRhID0gc2VxKC02LDYsMC4xKSkKQ294ID0gZGF0YS5mcmFtZShCb3gkeCwgQm94JHkpICAgICAgICAgICAgCkNveDIgPSBDb3hbd2l0aChDb3gsIG9yZGVyKC1Db3gkQm94LnkpKSxdIApDb3gyWzEsXSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKbGFtYmRhID0gQ294MlsxLCAiQm94LngiXSAgICAgICAgICAgICAgICAgIyBFeHRyYWN0IHRoYXQgbGFtYmRhClRfYm94ID0gKFR1cmJpZGl0eSBeIGxhbWJkYSAtIDEpL2xhbWJkYSAgICMgVHJhbnNmb3JtIHRoZSBvcmlnaW5hbCBkYXRhCnBsb3ROb3JtYWxIaXN0b2dyYW0oVF9ib3gsIG1haW4gPSAiYm94LWNveCB0cmFuc2Zvcm1hdGlvbiIpCgoKIyBib3gtY294IFQgZm9yIGFub3ZhIG1vZGVsIApJbnB1dCA9KCIKICAgICAgICBMb2NhdGlvbiBUdXJiaWRpdHkKICAgICAgICBhICAgICAgICAxLjAKICAgICAgICBhICAgICAgICAxLjIKICAgICAgICBhICAgICAgICAxLjEKICAgICAgICBhICAgICAgICAxLjEKICAgICAgICBhICAgICAgICAyLjQKICAgICAgICBhICAgICAgICAyLjIKICAgICAgICBhICAgICAgICAyLjYKICAgICAgICBhICAgICAgICA0LjEKICAgICAgICBhICAgICAgICA1LjAKICAgICAgICBhICAgICAgIDEwLjAKICAgICAgICBiICAgICAgICA0LjAKICAgICAgICBiICAgICAgICA0LjEKICAgICAgICBiICAgICAgICA0LjIKICAgICAgICBiICAgICAgICA0LjEKICAgICAgICBiICAgICAgICA1LjEKICAgICAgICBiICAgICAgICA0LjUKICAgICAgICBiICAgICAgICA1LjAKICAgICAgICBiICAgICAgIDE1LjIKICAgICAgICBiICAgICAgIDEwLjAKICAgICAgICBiICAgICAgIDIwLjAKICAgICAgICBjICAgICAgICAxLjEKICAgICAgICBjICAgICAgICAxLjEKICAgICAgICBjICAgICAgICAxLjIKICAgICAgICBjICAgICAgICAxLjYKICAgICAgICBjICAgICAgICAyLjIKICAgICAgICBjICAgICAgICAzLjAKICAgICAgICBjICAgICAgICA0LjAKICAgICAgICBjICAgICAgIDEwLjUKICAgICAgICAiKQpEYXRhID0gcmVhZC50YWJsZSh0ZXh0Q29ubmVjdGlvbihJbnB1dCksaGVhZGVyPVRSVUUpCgptb2RlbCA9IGxtKFR1cmJpZGl0eSB+IExvY2F0aW9uLCBkYXRhPURhdGEpCkFub3ZhKG1vZGVsLCB0eXBlID0gIklJIikKCnggPC0gcmVzaWR1YWxzKG1vZGVsKQpwbG90Tm9ybWFsSGlzdG9ncmFtKHgsIG1haW4gPSAicmVzaWR1YWxzIG9mIHVuLXRyYW5zZm9ybWVkIGRhdGEiKQoKcXFub3JtKHJlc2lkdWFscyhtb2RlbCksIHlsYWI9IlNhbXBsZSBRdWFudGlsZXMgZm9yIHJlc2lkdWFscyIpCnFxbGluZShyZXNpZHVhbHMobW9kZWwpLCBjb2w9InJlZCIpCnBsb3QoZml0dGVkKG1vZGVsKSxyZXNpZHVhbHMobW9kZWwpKQoKQm94ID0gYm94Y294KFR1cmJpZGl0eSB+IExvY2F0aW9uLAogICAgICAgICAgICAgZGF0YSA9IERhdGEsCiAgICAgICAgICAgICBsYW1iZGEgPSBzZXEoLTYsNiwwLjEpCikKCkNveCA9IGRhdGEuZnJhbWUoQm94JHgsIEJveCR5KQpDb3gyID0gQ294W3dpdGgoQ294LCBvcmRlcigtQ294JEJveC55KSksXQpDb3gyWzEsXQpsYW1iZGEgPSBDb3gyWzEsICJCb3gueCJdCkRhdGEkVHVyYmlkaXR5X2JveCA9IChEYXRhJFR1cmJpZGl0eSBeIGxhbWJkYSAtIDEpL2xhbWJkYSAgIApib3hwbG90KFR1cmJpZGl0eV9ib3ggfiBMb2NhdGlvbiwKICAgICAgICBkYXRhID0gRGF0YSwKICAgICAgICB5bGFiPSJCb3jigJNDb3gtdHJhbnNmb3JtZWQgVHVyYmlkaXR5IiwKICAgICAgICB4bGFiPSJMb2NhdGlvbiIpCgptb2RlbCA9IGxtKFR1cmJpZGl0eV9ib3ggfiBMb2NhdGlvbiwgZGF0YT1EYXRhKQpBbm92YShtb2RlbCwgdHlwZT0iSUkiKQp4ID0gcmVzaWR1YWxzKG1vZGVsKQpwbG90Tm9ybWFsSGlzdG9ncmFtKHgsIG1haW4gPSAiYm94LWNveCBUIG1vZGVsIikKCnFxbm9ybShyZXNpZHVhbHMobW9kZWwpLCB5bGFiPSJTYW1wbGUgUXVhbnRpbGVzIGZvciByZXNpZHVhbHMiKQpxcWxpbmUocmVzaWR1YWxzKG1vZGVsKSwgY29sPSJyZWQiKQpwbG90KGZpdHRlZChtb2RlbCkscmVzaWR1YWxzKG1vZGVsKSkKYGBgCg==