Normalization vs Standardization

pg_numeric <- pg %>% 
  mutate(year = as.factor(year)) %>% 
  select(where(is.numeric))

Normalization with caret

normalized <- caret::preProcess(pg_numeric, method = c("range"))
normalized
## Created from 342 samples and 4 variables
## 
## Pre-processing:
##   - ignored (0)
##   - re-scaling to [0, 1] (4)
pg_numeric_normalized <- predict(normalized, pg_numeric)
pg_numeric_normalized %>% head
## # A tibble: 6 x 4
##   bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##            <dbl>         <dbl>             <dbl>       <dbl>
## 1          0.255         0.667             0.153       0.292
## 2          0.269         0.512             0.237       0.306
## 3          0.298         0.583             0.390       0.153
## 4         NA            NA                NA          NA    
## 5          0.167         0.738             0.356       0.208
## 6          0.262         0.893             0.305       0.264
pg_numeric_normalized %>% 
  summarise(across(everything(), ~range(., na.rm = T)))
## # A tibble: 2 x 4
##   bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##            <dbl>         <dbl>             <dbl>       <dbl>
## 1              0             0                 0           0
## 2              1             1                 1           1
pg_numeric_normalized %>% 
  pivot_longer(everything(), names_to = "cols", values_to = "normalized", values_drop_na = TRUE) %>% 
  ggplot(aes(x = cols, y = normalized)) + 
  geom_boxplot() +
  geom_hline(yintercept = c(0,1), col = "blue", lty = 2)

Standardization with caret

standardize <- caret::preProcess(pg_numeric, method = c("center", "scale"))
standardize
## Created from 342 samples and 4 variables
## 
## Pre-processing:
##   - centered (4)
##   - ignored (0)
##   - scaled (4)
pg_numeric_standardized <- predict(standardize, pg_numeric)
pg_numeric_standardized %>% head
## # A tibble: 6 x 4
##   bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##            <dbl>         <dbl>             <dbl>       <dbl>
## 1         -0.883         0.784            -1.42       -0.563
## 2         -0.810         0.126            -1.06       -0.501
## 3         -0.663         0.430            -0.421      -1.19 
## 4         NA            NA                NA          NA    
## 5         -1.32          1.09             -0.563      -0.937
## 6         -0.847         1.75             -0.776      -0.688
pg_numeric_standardized %>% 
  summarise(across(everything(), ~range(., na.rm = T)))
## # A tibble: 2 x 4
##   bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##            <dbl>         <dbl>             <dbl>       <dbl>
## 1          -2.17         -2.05             -2.06       -1.87
## 2           2.87          2.20              2.14        2.62
gg_pg <- pg_numeric_standardized %>% 
  pivot_longer(everything(), names_to = "cols", values_to = "standardized", values_drop_na = TRUE) %>% 
  mutate(SD = sd(standardized, na.rm = T))
gg_pg %>% 
  ggplot(aes(x = cols, y = standardized)) + 
  geom_boxplot()

.EOF.