Read the data

data(iris)
df = iris
head(df)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

Normalize the data using yeojohnson transformation

normalized = df
library(bestNormalize)
normalized$Sepal.Length <- yeojohnson(df$Sepal.Length)$x.t
normalized$Sepal.Width <- yeojohnson(df$Sepal.Width)$x.t

Kolmogorov-Smirnov Test of Normality

ks.test(normalized$Sepal.Length, "pnorm", exact = T)

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  normalized$Sepal.Length
## D = 0.087324, p-value = 0.1913
## alternative hypothesis: two-sided

ks.test(normalized$Sepal.Width, "pnorm", exact = T)

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  normalized$Sepal.Width
## D = 0.087465, p-value = 0.1898
## alternative hypothesis: two-sided

Histogram of the normalized data

library(ggplot2)
theme_set(theme_classic())
normalized = as.data.frame(normalized)
ggplot(normalized, aes(x=Sepal.Length))+geom_histogram(bins = 30, col=2, fill=4)

ggplot(normalized, aes(x=Sepal.Width))+geom_histogram(bins = 30, col=2, fill=4)

follow me for more on

RPubs
Telegram
1. Channel
2. Q & A
YouTube
Website
Aparat

Normalize Data Using bestNormalize Package in R

Afshin Motavali

April, 2022

Read the data

Normalize the data using yeojohnson transformation

Kolmogorov-Smirnov Test of Normality

Histogram of the normalized data

follow me for more on