info_entropy <- function(x){
  factor_x <- factor(x)
  entropy <- 0
  
  for(str in levels(factor_x)){
    pro <- sum(x == str) / length(x)
    entropy <- entropy - pro * log2(pro)
  }
  return(entropy)
}
x <- c("red","blue","blue","red","red")
info_entropy(x)
## [1] 0.9709506
y <- c("white","blue","orange","red","red")
info_entropy(y)
## [1] 1.921928
setwd("~/ML/ml_with_r/data")
skin <-read.csv("skin.csv")
skin <- skin[-1]
head(skin)
##   성별 나이 직장여부 결혼여부 차량보유여부 쿠폰반응여부
## 1   남 30대       NO      YES           NO           NO
## 2   여 20대      YES      YES          YES           NO
## 3   여 20대      YES      YES           NO           NO
## 4   여 40대       NO       NO           NO           NO
## 5   여 30대       NO      YES           NO           NO
## 6   여 30대       NO       NO          YES           NO
first_entropy <- info_entropy(skin[,6])
first_entropy
## [1] 0.9709506
for(str in colnames(skin)[1:5]){
  factors <- levels(skin[[str]])
  sum_entropy <- 0
  
  for(str2 in factors){
    test_x <- skin[skin[[str]] == str2,][6]
    sum_entropy <- sum_entropy + info_entropy(test_x[,1])
  }
  cat(str, '---->', sum_entropy, '\n')
}
## 성별 ----> 1.641098 
## 나이 ----> 2.796506 
## 직장여부 ----> 1.887994 
## 결혼여부 ----> 0.9709506 
## 차량보유여부 ----> 1.932395
library(rpart)
tree <- rpart(쿠폰반응여부 ~ ., data=skin, control=rpart.control(minsplit = 2))
plot(tree,compress=T, uniform= T, margin=0.1)
text(tree, use.n=T, col="blue")

tree
## n= 30 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##   1) root 30 12 NO (0.6000000 0.4000000)  
##     2) 결혼여부=NO 10  0 NO (1.0000000 0.0000000) *
##     3) 결혼여부=YES 20  8 YES (0.4000000 0.6000000)  
##       6) 나이=20대,30대 16  8 NO (0.5000000 0.5000000)  
##        12) 성별=남 5  1 NO (0.8000000 0.2000000)  
##          24) 직장여부=NO 3  0 NO (1.0000000 0.0000000) *
##          25) 직장여부=YES 2  1 NO (0.5000000 0.5000000)  
##            50) 나이=20대 1  0 NO (1.0000000 0.0000000) *
##            51) 나이=30대 1  0 YES (0.0000000 1.0000000) *
##        13) 성별=여 11  4 YES (0.3636364 0.6363636)  
##          26) 나이=20대 5  2 NO (0.6000000 0.4000000)  
##            52) 직장여부=YES 2  0 NO (1.0000000 0.0000000) *
##            53) 직장여부=NO 3  1 YES (0.3333333 0.6666667)  
##             106) 차량보유여부=NO 1  0 NO (1.0000000 0.0000000) *
##             107) 차량보유여부=YES 2  0 YES (0.0000000 1.0000000) *
##          27) 나이=30대 6  1 YES (0.1666667 0.8333333)  
##            54) 직장여부=NO 2  1 NO (0.5000000 0.5000000)  
##             108) 차량보유여부=NO 1  0 NO (1.0000000 0.0000000) *
##             109) 차량보유여부=YES 1  0 YES (0.0000000 1.0000000) *
##            55) 직장여부=YES 4  0 YES (0.0000000 1.0000000) *
##       7) 나이=40대 4  0 YES (0.0000000 1.0000000) *