Zero Variable in R

library(tidyverse)
library(caret)
samp <- data.frame(
  a = sample(10, 100, replace = T),
  b = seq(1:100),
  c = rnorm(100),
  d = rep(1, 100)
)

zero variance 추출

caret::nearZeroVar(samp)
## [1] 4

zero variance 열 이름 출력

caret::nearZeroVar(samp, name = T)
## [1] "d"

zero variance metrix 출력

caret::nearZeroVar(samp, saveMetrics = T)
##   freqRatio percentUnique zeroVar   nzv
## a         1            10   FALSE FALSE
## b         1           100   FALSE FALSE
## c         1           100   FALSE FALSE
## d         0             1    TRUE  TRUE

Near Zero Variance 열 제거

samp[, -caret::nearZeroVar(samp)] %>% 
  head()
##    a b          c
## 1  6 1 -1.1223224
## 2 10 2  1.3769141
## 3  3 3 -0.1141288
## 4  4 4 -0.7803554
## 5  3 5 -2.5206824
## 6  1 6  2.5261171
samp %>% 
  summarise(across(everything(), ~ c(mean(.), sd(.)))) %>% 
  `rownames<-`(c("Mean", "SD"))
##             a        b         c d
## Mean 5.820000 50.50000 0.1756161 1
## SD   2.924574 29.01149 1.0080762 0