info_entropy <- function(x){
factor_x <- factor(x)
entropy <- 0
for(str in levels(factor_x)){
pro <- sum(x == str) / length(x)
entropy <- entropy - pro * log2(pro)
}
return(entropy)
}
x <- c("red","blue","blue","red","red")
info_entropy(x)
## [1] 0.9709506
y <- c("white","blue","orange","red","red")
info_entropy(y)
## [1] 1.921928
setwd("~/ML/ml_with_r/data")
skin <-read.csv("skin.csv")
skin <- skin[-1]
head(skin)
## 성별 나이 직장여부 결혼여부 차량보유여부 쿠폰반응여부
## 1 남 30대 NO YES NO NO
## 2 여 20대 YES YES YES NO
## 3 여 20대 YES YES NO NO
## 4 여 40대 NO NO NO NO
## 5 여 30대 NO YES NO NO
## 6 여 30대 NO NO YES NO
first_entropy <- info_entropy(skin[,6])
first_entropy
## [1] 0.9709506
for(str in colnames(skin)[1:5]){
factors <- levels(skin[[str]])
sum_entropy <- 0
for(str2 in factors){
test_x <- skin[skin[[str]] == str2,][6]
sum_entropy <- sum_entropy + info_entropy(test_x[,1])
}
cat(str, '---->', sum_entropy, '\n')
}
## 성별 ----> 1.641098
## 나이 ----> 2.796506
## 직장여부 ----> 1.887994
## 결혼여부 ----> 0.9709506
## 차량보유여부 ----> 1.932395
library(rpart)
tree <- rpart(쿠폰반응여부 ~ ., data=skin, control=rpart.control(minsplit = 2))
plot(tree,compress=T, uniform= T, margin=0.1)
text(tree, use.n=T, col="blue")

tree
## n= 30
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 30 12 NO (0.6000000 0.4000000)
## 2) 결혼여부=NO 10 0 NO (1.0000000 0.0000000) *
## 3) 결혼여부=YES 20 8 YES (0.4000000 0.6000000)
## 6) 나이=20대,30대 16 8 NO (0.5000000 0.5000000)
## 12) 성별=남 5 1 NO (0.8000000 0.2000000)
## 24) 직장여부=NO 3 0 NO (1.0000000 0.0000000) *
## 25) 직장여부=YES 2 1 NO (0.5000000 0.5000000)
## 50) 나이=20대 1 0 NO (1.0000000 0.0000000) *
## 51) 나이=30대 1 0 YES (0.0000000 1.0000000) *
## 13) 성별=여 11 4 YES (0.3636364 0.6363636)
## 26) 나이=20대 5 2 NO (0.6000000 0.4000000)
## 52) 직장여부=YES 2 0 NO (1.0000000 0.0000000) *
## 53) 직장여부=NO 3 1 YES (0.3333333 0.6666667)
## 106) 차량보유여부=NO 1 0 NO (1.0000000 0.0000000) *
## 107) 차량보유여부=YES 2 0 YES (0.0000000 1.0000000) *
## 27) 나이=30대 6 1 YES (0.1666667 0.8333333)
## 54) 직장여부=NO 2 1 NO (0.5000000 0.5000000)
## 108) 차량보유여부=NO 1 0 NO (1.0000000 0.0000000) *
## 109) 차량보유여부=YES 1 0 YES (0.0000000 1.0000000) *
## 55) 직장여부=YES 4 0 YES (0.0000000 1.0000000) *
## 7) 나이=40대 4 0 YES (0.0000000 1.0000000) *