pg_numeric <- pg %>%
mutate(year = as.factor(year)) %>%
select(where(is.numeric))
normalized <- caret::preProcess(pg_numeric, method = c("range"))
normalized
## Created from 342 samples and 4 variables
##
## Pre-processing:
## - ignored (0)
## - re-scaling to [0, 1] (4)
pg_numeric_normalized <- predict(normalized, pg_numeric)
pg_numeric_normalized %>% head
## # A tibble: 6 x 4
## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <dbl> <dbl> <dbl> <dbl>
## 1 0.255 0.667 0.153 0.292
## 2 0.269 0.512 0.237 0.306
## 3 0.298 0.583 0.390 0.153
## 4 NA NA NA NA
## 5 0.167 0.738 0.356 0.208
## 6 0.262 0.893 0.305 0.264
pg_numeric_normalized %>%
summarise(across(everything(), ~range(., na.rm = T)))
## # A tibble: 2 x 4
## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0
## 2 1 1 1 1
pg_numeric_normalized %>%
pivot_longer(everything(), names_to = "cols", values_to = "normalized", values_drop_na = TRUE) %>%
ggplot(aes(x = cols, y = normalized)) +
geom_boxplot() +
geom_hline(yintercept = c(0,1), col = "blue", lty = 2)
standardize <- caret::preProcess(pg_numeric, method = c("center", "scale"))
standardize
## Created from 342 samples and 4 variables
##
## Pre-processing:
## - centered (4)
## - ignored (0)
## - scaled (4)
pg_numeric_standardized <- predict(standardize, pg_numeric)
pg_numeric_standardized %>% head
## # A tibble: 6 x 4
## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <dbl> <dbl> <dbl> <dbl>
## 1 -0.883 0.784 -1.42 -0.563
## 2 -0.810 0.126 -1.06 -0.501
## 3 -0.663 0.430 -0.421 -1.19
## 4 NA NA NA NA
## 5 -1.32 1.09 -0.563 -0.937
## 6 -0.847 1.75 -0.776 -0.688
pg_numeric_standardized %>%
summarise(across(everything(), ~range(., na.rm = T)))
## # A tibble: 2 x 4
## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <dbl> <dbl> <dbl> <dbl>
## 1 -2.17 -2.05 -2.06 -1.87
## 2 2.87 2.20 2.14 2.62
gg_pg <- pg_numeric_standardized %>%
pivot_longer(everything(), names_to = "cols", values_to = "standardized", values_drop_na = TRUE) %>%
mutate(SD = sd(standardized, na.rm = T))
gg_pg %>%
ggplot(aes(x = cols, y = standardized)) +
geom_boxplot()
.EOF.