rm(list = ls())
####################
#Methods of Data Normalization
#1. Z-score Normalization(Standardization)
#2. Robust Scalar
#3. Min-Max Normalization
#4. Mean Normalization
#5. Unit Length
#####################1. Z-score Normalization(Standardization)
df <- data.frame(matrix(1:10, ncol = 2))
df
##   X1 X2
## 1  1  6
## 2  2  7
## 3  3  8
## 4  4  9
## 5  5 10
(centered.x <- scale(df, scale = T))
##              X1         X2
## [1,] -1.2649111 -1.2649111
## [2,] -0.6324555 -0.6324555
## [3,]  0.0000000  0.0000000
## [4,]  0.6324555  0.6324555
## [5,]  1.2649111  1.2649111
## attr(,"scaled:center")
## X1 X2 
##  3  8 
## attr(,"scaled:scale")
##       X1       X2 
## 1.581139 1.581139
sapply(df, quantile)
##      X1 X2
## 0%    1  6
## 25%   2  7
## 50%   3  8
## 75%   4  9
## 100%  5 10
lapply(df, mean)
## $X1
## [1] 3
## 
## $X2
## [1] 8
lapply(df, quantile)
## $X1
##   0%  25%  50%  75% 100% 
##    1    2    3    4    5 
## 
## $X2
##   0%  25%  50%  75% 100% 
##    6    7    8    9   10
#####################2. Robust Scalar
robust_scalar <- function(x){(x- median(x)) /(quantile(x,probs = .75)-quantile(x,probs = .25))}
sapply(df, robust_scalar)
##        X1   X2
## [1,] -1.0 -1.0
## [2,] -0.5 -0.5
## [3,]  0.0  0.0
## [4,]  0.5  0.5
## [5,]  1.0  1.0
#####################3. Min-Max Normalization
norm_minmax <- function(x){
  (x- min(x)) /(max(x)-min(x))
}
sapply(df, norm_minmax)
##        X1   X2
## [1,] 0.00 0.00
## [2,] 0.25 0.25
## [3,] 0.50 0.50
## [4,] 0.75 0.75
## [5,] 1.00 1.00
#####################4. Mean Normalization
mean_norm_minmax <- function(x){
  (x- mean(x)) /(max(x)-min(x))
}
sapply(df, mean_norm_minmax)
##         X1    X2
## [1,] -0.50 -0.50
## [2,] -0.25 -0.25
## [3,]  0.00  0.00
## [4,]  0.25  0.25
## [5,]  0.50  0.50
class(sapply(df, mean_norm_minmax))
## [1] "matrix" "array"
#####################5. Unit Length
unit_length <- function(x) {
  x / sqrt(sum(x^2))
}
sapply(df, unit_length)
##             X1        X2
## [1,] 0.1348400 0.3302891
## [2,] 0.2696799 0.3853373
## [3,] 0.4045199 0.4403855
## [4,] 0.5393599 0.4954337
## [5,] 0.6741999 0.5504819
####################plot
library(ggplot2)
ggplot() + 
  geom_point(df, mapping = aes(x=X1, y=X2), color='darkgreen')

ggplot() + 
  geom_point(as.data.frame(sapply(df, unit_length)), 
             mapping = aes(x=X1, y=X2), color='darkgreen')

##ref https://medium.com/swlh/data-normalisation-with-r-6ef1d1947970