library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## 필요한 패키지를 로딩중입니다: ggplot2
## 필요한 패키지를 로딩중입니다: lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## 다음의 패키지를 부착합니다: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
x_train <- read.csv('X_train.csv',fileEncoding='euc-kr')
x_test <- read.csv('X_test.csv',fileEncoding='euc-kr')
y_train <- read.csv('y_train.csv',fileEncoding='euc-kr')

train<-left_join(x_train,y_train,by='cust_id')
train <-train %>% mutate(index='train')
test<- x_test %>% mutate(index='test')
data<-bind_rows(train,test)
glimpse(data)
## Rows: 5,982
## Columns: 12
## $ cust_id        <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ 총구매액       <dbl> 68282840, 2136000, 3197000, 16077620, 29050000, 1137900…
## $ 최대구매액     <int> 11264000, 2136000, 1639000, 4935000, 24000000, 9552000,…
## $ 환불금액       <int> 6860000, 300000, NA, NA, NA, 462000, 4582000, 29524000,…
## $ 주구매상품     <chr> "기타", "스포츠", "남성 캐주얼", "기타", "보석", "디자…
## $ 주구매지점     <chr> "강남점", "잠실점", "관악점", "광주점", "본  점", "일산…
## $ 내점일수       <int> 19, 2, 2, 18, 2, 3, 5, 63, 18, 1, 25, 3, 2, 27, 84, 152…
## $ 내점당구매건수 <dbl> 3.894737, 1.500000, 2.000000, 2.444444, 1.500000, 1.666…
## $ 주말방문비율   <dbl> 0.52702703, 0.00000000, 0.00000000, 0.31818182, 0.00000…
## $ 구매주기       <int> 17, 1, 1, 16, 85, 42, 42, 5, 15, 0, 13, 89, 16, 10, 4, …
## $ gender         <int> 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0…
## $ index          <chr> "train", "train", "train", "train", "train", "train", "…
colSums(is.na(data))
##        cust_id       총구매액     최대구매액       환불금액     주구매상품 
##              0              0              0           3906              0 
##     주구매지점       내점일수 내점당구매건수   주말방문비율       구매주기 
##              0              0              0              0              0 
##         gender          index 
##           2482              0
df<-read.csv('travel_data.csv')
set.seed(1357)
train_list <-createDataPartition(y=df$TravelInsurance,p=.75,list=FALSE)
df_train<-df[train_list,]
df_test<-df[-train_list,]
glimpse(df_train)
## Rows: 1,491
## Columns: 10
## $ INDEX               <int> 2, 3, 4, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 20, …
## $ Age                 <int> 34, 28, 28, 31, 28, 33, 31, 26, 31, 28, 28, 29, 34…
## $ Employment.Type     <chr> "Private Sector/Self Employed", "Private Sector/Se…
## $ GraduateOrNot       <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "…
## $ AnnualIncome        <int> 500000, 700000, 700000, 1350000, 1450000, 800000, …
## $ FamilyMembers       <int> 4, 3, 8, 3, 6, 3, 9, 5, 6, 4, 7, 5, 2, 6, 3, 4, 9,…
## $ ChronicDiseases     <int> 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,…
## $ FrequentFlyer       <chr> "No", "No", "Yes", "Yes", "Yes", "Yes", "No", "Yes…
## $ EverTravelledAbroad <chr> "No", "No", "No", "Yes", "Yes", "No", "No", "Yes",…
## $ TravelInsurance     <int> 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,…
glimpse(df_test)
## Rows: 496
## Columns: 10
## $ INDEX               <int> 0, 1, 5, 6, 12, 14, 15, 27, 33, 37, 38, 39, 43, 46…
## $ Age                 <int> 31, 31, 25, 31, 32, 31, 34, 28, 32, 34, 34, 33, 28…
## $ Employment.Type     <chr> "Government Sector", "Private Sector/Self Employed…
## $ GraduateOrNot       <chr> "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Y…
## $ AnnualIncome        <int> 400000, 1250000, 1150000, 1300000, 850000, 400000,…
## $ FamilyMembers       <int> 6, 7, 4, 4, 6, 3, 7, 2, 3, 4, 2, 5, 4, 4, 3, 3, 9,…
## $ ChronicDiseases     <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ FrequentFlyer       <chr> "No", "No", "No", "No", "No", "No", "No", "Yes", "…
## $ EverTravelledAbroad <chr> "No", "No", "No", "No", "No", "No", "No", "No", "N…
## $ TravelInsurance     <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1,…
colSums(is.na(df_train))
##               INDEX                 Age     Employment.Type       GraduateOrNot 
##                   0                   0                   0                   0 
##        AnnualIncome       FamilyMembers     ChronicDiseases       FrequentFlyer 
##                   0                   0                   0                   0 
## EverTravelledAbroad     TravelInsurance 
##                   0                   0
colSums(is.na(df_test))
##               INDEX                 Age     Employment.Type       GraduateOrNot 
##                   0                   0                   0                   0 
##        AnnualIncome       FamilyMembers     ChronicDiseases       FrequentFlyer 
##                   0                   0                   0                   0 
## EverTravelledAbroad     TravelInsurance 
##                   0                   0
df_train <- df_train %>% mutate(index='train')
df_test<-df_test %>% mutate(index='test')
df_train$TravelInsurance<- as.factor(df_train$TravelInsurance)
df_test$TravelInsurance<-as.factor(df_test$TravelInsurance)

data<-bind_rows(df_train,df_test)
colSums(is.na(data))
##               INDEX                 Age     Employment.Type       GraduateOrNot 
##                   0                   0                   0                   0 
##        AnnualIncome       FamilyMembers     ChronicDiseases       FrequentFlyer 
##                   0                   0                   0                   0 
## EverTravelledAbroad     TravelInsurance               index 
##                   0                   0                   0
data$TravelInsurance<-ifelse(data$TravelInsurance==0,'미가입','가입')
data$TravelInsurance<-as.factor(data$TravelInsurance)
data$GraduateOrNot<-as.factor(data$GraduateOrNot)
data$FrequentFlyer<-as.factor(data$FrequentFlyer)
data$EverTravelledAbroad<-as.factor(data$EverTravelledAbroad)

train<-data %>% filter(index=='train') %>% select(-index)
test<- data %>% filter(index=='test') %>% select(-index)

summary(train)
##      INDEX             Age        Employment.Type    GraduateOrNot
##  Min.   :   2.0   Min.   :25.00   Length:1491        No : 220     
##  1st Qu.: 500.5   1st Qu.:28.00   Class :character   Yes:1271     
##  Median :1016.0   Median :29.00   Mode  :character                
##  Mean   :1002.9   Mean   :29.62                                   
##  3rd Qu.:1502.5   3rd Qu.:32.00                                   
##  Max.   :1985.0   Max.   :35.00                                   
##   AnnualIncome     FamilyMembers   ChronicDiseases  FrequentFlyer
##  Min.   : 300000   Min.   :2.000   Min.   :0.0000   No :1174     
##  1st Qu.: 600000   1st Qu.:4.000   1st Qu.:0.0000   Yes: 317     
##  Median : 900000   Median :5.000   Median :0.0000                
##  Mean   : 930550   Mean   :4.728   Mean   :0.2763                
##  3rd Qu.:1250000   3rd Qu.:6.000   3rd Qu.:1.0000                
##  Max.   :1800000   Max.   :9.000   Max.   :1.0000                
##  EverTravelledAbroad TravelInsurance
##  No :1212            가입  :530     
##  Yes: 279            미가입:961     
##                                     
##                                     
##                                     
## 
glimpse(train)
## Rows: 1,491
## Columns: 10
## $ INDEX               <int> 2, 3, 4, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 20, …
## $ Age                 <int> 34, 28, 28, 31, 28, 33, 31, 26, 31, 28, 28, 29, 34…
## $ Employment.Type     <chr> "Private Sector/Self Employed", "Private Sector/Se…
## $ GraduateOrNot       <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, …
## $ AnnualIncome        <int> 500000, 700000, 700000, 1350000, 1450000, 800000, …
## $ FamilyMembers       <int> 4, 3, 8, 3, 6, 3, 9, 5, 6, 4, 7, 5, 2, 6, 3, 4, 9,…
## $ ChronicDiseases     <int> 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,…
## $ FrequentFlyer       <fct> No, No, Yes, Yes, Yes, Yes, No, Yes, Yes, No, No, …
## $ EverTravelledAbroad <fct> No, No, No, Yes, Yes, No, No, Yes, Yes, No, No, No…
## $ TravelInsurance     <fct> 가입, 미가입, 미가입, 가입, 가입, 미가입, 미가입, …
glimpse(test)
## Rows: 496
## Columns: 10
## $ INDEX               <int> 0, 1, 5, 6, 12, 14, 15, 27, 33, 37, 38, 39, 43, 46…
## $ Age                 <int> 31, 31, 25, 31, 32, 31, 34, 28, 32, 34, 34, 33, 28…
## $ Employment.Type     <chr> "Government Sector", "Private Sector/Self Employed…
## $ GraduateOrNot       <fct> Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Y…
## $ AnnualIncome        <int> 400000, 1250000, 1150000, 1300000, 850000, 400000,…
## $ FamilyMembers       <int> 6, 7, 4, 4, 6, 3, 7, 2, 3, 4, 2, 5, 4, 4, 3, 3, 9,…
## $ ChronicDiseases     <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ FrequentFlyer       <fct> No, No, No, No, No, No, No, Yes, No, No, No, Yes, …
## $ EverTravelledAbroad <fct> No, No, No, No, No, No, No, No, No, No, No, Yes, N…
## $ TravelInsurance     <fct> 미가입, 미가입, 미가입, 미가입, 가입, 미가입, 미가…
colSums(is.na(train))
##               INDEX                 Age     Employment.Type       GraduateOrNot 
##                   0                   0                   0                   0 
##        AnnualIncome       FamilyMembers     ChronicDiseases       FrequentFlyer 
##                   0                   0                   0                   0 
## EverTravelledAbroad     TravelInsurance 
##                   0                   0
colSums(is.na(test))
##               INDEX                 Age     Employment.Type       GraduateOrNot 
##                   0                   0                   0                   0 
##        AnnualIncome       FamilyMembers     ChronicDiseases       FrequentFlyer 
##                   0                   0                   0                   0 
## EverTravelledAbroad     TravelInsurance 
##                   0                   0
model=train(TravelInsurance~.,data=train,
            method='knn',
            metric="ROC",
            preProcess=c('scale','center'),
            trControl=trainControl(method = 'cv',
                                   summaryFunction = twoClassSummary,
                                   classProbs = TRUE))
model
## k-Nearest Neighbors 
## 
## 1491 samples
##    9 predictor
##    2 classes: '가입', '미가입' 
## 
## Pre-processing: scaled (9), centered (9) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 1342, 1342, 1342, 1342, 1342, 1342, ... 
## Resampling results across tuning parameters:
## 
##   k  ROC        Sens       Spec     
##   5  0.7312324  0.5264151  0.8876503
##   7  0.7470022  0.5264151  0.9094931
##   9  0.7524415  0.5226415  0.9209729
## 
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
result<-predict(model,test,type='prob')
confusionMatrix(model)
## Cross-Validated (10 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction 가입 미가입
##     가입   18.6    5.1
##     미가입 17.0   59.4
##                             
##  Accuracy (average) : 0.7793
names(result)[1]<-'y_pred'
bind_cols(test,result) %>% select(INDEX,y_pred,TravelInsurance)->df
df
##     INDEX    y_pred TravelInsurance
## 1       0 0.2222222          미가입
## 2       1 0.1111111          미가입
## 3       5 0.2222222          미가입
## 4       6 0.0000000          미가입
## 5      12 0.3333333            가입
## 6      14 0.1111111          미가입
## 7      15 0.6666667          미가입
## 8      27 0.2222222          미가입
## 9      33 0.1111111          미가입
## 10     37 0.5555556          미가입
## 11     38 0.1111111          미가입
## 12     39 0.8888889            가입
## 13     43 0.1111111            가입
## 14     46 0.8888889            가입
## 15     48 0.2222222          미가입
## 16     56 0.2222222            가입
## 17     64 0.7777778            가입
## 18     65 0.2222222          미가입
## 19     72 0.2222222          미가입
## 20     74 0.1111111          미가입
## 21     83 0.5555556            가입
## 22     85 0.7777778            가입
## 23     92 0.1111111          미가입
## 24     94 0.2222222          미가입
## 25     97 0.3333333          미가입
## 26     99 0.2222222          미가입
## 27    101 0.7777778            가입
## 28    105 0.2222222          미가입
## 29    106 0.3333333          미가입
## 30    110 0.0000000          미가입
## 31    111 0.0000000          미가입
## 32    112 0.1111111          미가입
## 33    125 0.2222222            가입
## 34    127 0.1111111          미가입
## 35    128 0.6666667            가입
## 36    133 0.8888889            가입
## 37    135 0.3333333            가입
## 38    140 0.1111111          미가입
## 39    154 0.0000000          미가입
## 40    155 0.4444444            가입
## 41    158 0.1111111            가입
## 42    161 0.5555556            가입
## 43    162 0.5555556            가입
## 44    171 0.3333333          미가입
## 45    176 0.5555556            가입
## 46    178 0.1111111          미가입
## 47    183 0.5555556          미가입
## 48    184 0.2222222          미가입
## 49    187 0.3333333          미가입
## 50    188 0.3333333          미가입
## 51    195 0.8888889            가입
## 52    198 0.4444444            가입
## 53    200 0.1111111            가입
## 54    210 0.3333333          미가입
## 55    212 0.3333333          미가입
## 56    213 0.8888889            가입
## 57    222 0.2222222          미가입
## 58    223 0.1111111          미가입
## 59    224 0.1111111          미가입
## 60    233 0.4444444          미가입
## 61    234 0.2222222          미가입
## 62    238 0.2222222          미가입
## 63    242 0.1111111          미가입
## 64    243 0.2222222          미가입
## 65    249 0.1111111          미가입
## 66    250 0.5555556            가입
## 67    257 0.4444444            가입
## 68    266 0.0000000          미가입
## 69    267 0.1111111            가입
## 70    269 0.1111111          미가입
## 71    270 0.6666667            가입
## 72    272 0.2222222          미가입
## 73    273 0.4444444            가입
## 74    283 0.0000000          미가입
## 75    286 0.2222222          미가입
## 76    298 0.8888889            가입
## 77    300 0.5555556            가입
## 78    301 0.1111111          미가입
## 79    306 0.3333333            가입
## 80    309 0.0000000            가입
## 81    312 0.5555556          미가입
## 82    320 0.4444444          미가입
## 83    321 0.2222222          미가입
## 84    323 0.6666667            가입
## 85    324 0.1111111          미가입
## 86    334 0.2222222            가입
## 87    337 0.1111111          미가입
## 88    339 0.1111111          미가입
## 89    343 0.2222222          미가입
## 90    347 0.3333333          미가입
## 91    352 0.6666667            가입
## 92    356 0.3333333          미가입
## 93    359 0.2222222          미가입
## 94    361 0.6666667            가입
## 95    362 0.2222222          미가입
## 96    365 0.7777778            가입
## 97    366 0.2222222          미가입
## 98    369 0.6666667          미가입
## 99    372 0.2222222          미가입
## 100   378 0.8888889            가입
## 101   392 0.3333333            가입
## 102   394 0.0000000            가입
## 103   403 0.2222222          미가입
## 104   404 0.4444444            가입
## 105   414 0.7777778            가입
## 106   422 0.8888889            가입
## 107   423 1.0000000            가입
## 108   425 0.2222222          미가입
## 109   428 0.2222222          미가입
## 110   429 0.1111111          미가입
## 111   430 0.2222222            가입
## 112   431 0.2222222          미가입
## 113   443 0.2222222            가입
## 114   449 0.1111111            가입
## 115   456 0.8888889            가입
## 116   463 0.1111111          미가입
## 117   464 1.0000000            가입
## 118   468 0.2222222          미가입
## 119   469 0.8888889            가입
## 120   470 0.2222222          미가입
## 121   472 0.0000000          미가입
## 122   473 0.7777778            가입
## 123   479 0.3333333          미가입
## 124   482 0.6666667          미가입
## 125   484 0.3333333            가입
## 126   486 0.3333333          미가입
## 127   490 0.2222222          미가입
## 128   499 0.0000000          미가입
## 129   505 0.2222222          미가입
## 130   512 0.1111111          미가입
## 131   515 0.1111111          미가입
## 132   518 0.8888889            가입
## 133   528 0.2222222          미가입
## 134   529 0.1111111          미가입
## 135   530 0.1111111          미가입
## 136   532 0.2222222            가입
## 137   533 0.1111111          미가입
## 138   534 0.0000000            가입
## 139   536 0.1111111          미가입
## 140   538 0.3333333          미가입
## 141   547 0.7777778            가입
## 142   549 0.0000000          미가입
## 143   550 0.3333333          미가입
## 144   551 0.7777778            가입
## 145   553 1.0000000          미가입
## 146   554 0.3333333          미가입
## 147   559 0.1111111            가입
## 148   561 0.3333333          미가입
## 149   562 0.2222222          미가입
## 150   563 0.2222222            가입
## 151   565 0.1111111          미가입
## 152   570 0.3333333          미가입
## 153   584 0.1111111          미가입
## 154   586 0.0000000          미가입
## 155   593 0.3333333          미가입
## 156   594 0.5555556          미가입
## 157   598 0.6666667            가입
## 158   600 0.8888889            가입
## 159   606 0.2222222          미가입
## 160   610 0.4444444          미가입
## 161   611 0.2222222          미가입
## 162   614 0.3333333          미가입
## 163   615 0.3333333          미가입
## 164   616 0.0000000          미가입
## 165   617 0.2222222            가입
## 166   619 0.2222222            가입
## 167   623 0.1111111          미가입
## 168   625 0.2222222          미가입
## 169   628 0.2222222          미가입
## 170   633 0.2222222          미가입
## 171   642 0.0000000          미가입
## 172   654 0.3333333          미가입
## 173   655 0.2222222          미가입
## 174   659 0.3333333          미가입
## 175   662 0.2222222          미가입
## 176   667 0.7777778          미가입
## 177   675 0.1111111          미가입
## 178   676 0.0000000          미가입
## 179   677 0.1111111          미가입
## 180   681 0.1111111          미가입
## 181   682 0.8888889            가입
## 182   688 0.2222222          미가입
## 183   689 0.0000000          미가입
## 184   690 0.7777778          미가입
## 185   698 0.8888889            가입
## 186   699 0.3333333          미가입
## 187   703 0.0000000          미가입
## 188   708 0.2222222          미가입
## 189   713 0.2222222            가입
## 190   717 0.2222222          미가입
## 191   719 0.0000000          미가입
## 192   724 0.3333333          미가입
## 193   731 0.2222222          미가입
## 194   738 0.1111111          미가입
## 195   741 0.5555556            가입
## 196   742 0.4444444            가입
## 197   744 0.4444444          미가입
## 198   746 0.3333333          미가입
## 199   749 0.6666667            가입
## 200   750 0.2222222          미가입
## 201   752 0.2222222            가입
## 202   754 0.1111111          미가입
## 203   756 0.0000000          미가입
## 204   758 0.7777778            가입
## 205   759 0.5555556            가입
## 206   761 1.0000000            가입
## 207   771 0.3333333            가입
## 208   775 0.1111111          미가입
## 209   778 0.1111111          미가입
## 210   779 1.0000000            가입
## 211   781 0.2222222            가입
## 212   784 0.8888889            가입
## 213   791 0.3333333            가입
## 214   795 0.1111111          미가입
## 215   801 0.1111111            가입
## 216   804 0.3333333          미가입
## 217   805 0.2222222          미가입
## 218   807 0.3333333            가입
## 219   819 0.2222222          미가입
## 220   822 0.2222222          미가입
## 221   823 0.2222222          미가입
## 222   830 0.5555556            가입
## 223   833 0.7777778            가입
## 224   837 0.1111111          미가입
## 225   853 0.4444444          미가입
## 226   856 0.6666667            가입
## 227   859 0.3333333          미가입
## 228   863 0.2222222          미가입
## 229   865 0.8888889            가입
## 230   876 0.1111111          미가입
## 231   879 0.3333333            가입
## 232   885 0.0000000          미가입
## 233   886 0.8888889            가입
## 234   889 0.3333333          미가입
## 235   892 0.3333333          미가입
## 236   893 0.1111111          미가입
## 237   894 0.3333333            가입
## 238   896 0.0000000          미가입
## 239   902 0.6666667            가입
## 240   903 0.3333333          미가입
## 241   904 0.0000000          미가입
## 242   909 0.7777778            가입
## 243   913 1.0000000            가입
## 244   924 0.1111111          미가입
## 245   926 0.3333333            가입
## 246   927 0.8888889            가입
## 247   938 0.3333333            가입
## 248   939 0.2222222            가입
## 249   947 0.2222222          미가입
## 250   948 0.1111111          미가입
## 251   952 1.0000000            가입
## 252   959 0.2222222          미가입
## 253   965 0.6666667          미가입
## 254   968 0.1111111          미가입
## 255   973 1.0000000            가입
## 256   975 0.6666667            가입
## 257   978 0.2222222          미가입
## 258   981 0.2222222          미가입
## 259   982 0.2222222          미가입
## 260   983 0.6666667            가입
## 261   988 0.4444444          미가입
## 262   994 0.4444444          미가입
## 263   995 0.2222222          미가입
## 264   999 0.5555556            가입
## 265  1000 0.1111111          미가입
## 266  1003 0.1111111          미가입
## 267  1004 0.0000000          미가입
## 268  1005 0.2222222          미가입
## 269  1006 0.2222222          미가입
## 270  1011 0.4444444            가입
## 271  1013 0.1111111          미가입
## 272  1017 0.4444444          미가입
## 273  1019 0.5555556            가입
## 274  1022 0.1111111          미가입
## 275  1031 0.2222222          미가입
## 276  1037 0.2222222            가입
## 277  1044 0.5555556            가입
## 278  1046 0.3333333          미가입
## 279  1050 0.5555556          미가입
## 280  1053 0.1111111            가입
## 281  1055 1.0000000            가입
## 282  1057 0.3333333          미가입
## 283  1063 0.2222222          미가입
## 284  1067 0.1111111            가입
## 285  1079 0.0000000          미가입
## 286  1080 0.1111111          미가입
## 287  1082 0.2222222          미가입
## 288  1087 0.5555556          미가입
## 289  1090 0.1111111          미가입
## 290  1092 0.6666667            가입
## 291  1095 0.6666667            가입
## 292  1098 0.1111111          미가입
## 293  1106 0.1111111          미가입
## 294  1110 1.0000000          미가입
## 295  1111 0.2222222          미가입
## 296  1112 0.2222222          미가입
## 297  1135 0.1111111            가입
## 298  1140 0.7777778          미가입
## 299  1146 0.2222222          미가입
## 300  1147 1.0000000            가입
## 301  1150 0.3333333            가입
## 302  1151 0.7777778            가입
## 303  1153 0.2222222          미가입
## 304  1156 0.1111111            가입
## 305  1158 0.1111111          미가입
## 306  1162 0.3333333            가입
## 307  1169 0.7777778            가입
## 308  1172 0.3333333            가입
## 309  1173 0.5555556            가입
## 310  1175 0.1111111          미가입
## 311  1182 0.2222222          미가입
## 312  1186 0.2222222          미가입
## 313  1188 0.1111111          미가입
## 314  1190 0.1111111          미가입
## 315  1193 0.1111111          미가입
## 316  1195 0.0000000          미가입
## 317  1198 0.0000000          미가입
## 318  1202 1.0000000            가입
## 319  1206 0.1111111            가입
## 320  1207 0.2222222          미가입
## 321  1209 0.8888889            가입
## 322  1210 0.3333333            가입
## 323  1212 0.3333333          미가입
## 324  1213 0.3333333            가입
## 325  1219 0.3333333            가입
## 326  1230 0.6666667            가입
## 327  1235 0.3333333          미가입
## 328  1245 1.0000000            가입
## 329  1247 0.1111111          미가입
## 330  1257 0.4444444          미가입
## 331  1263 0.1111111          미가입
## 332  1264 0.2222222          미가입
## 333  1271 0.6666667            가입
## 334  1272 0.2222222            가입
## 335  1274 0.0000000          미가입
## 336  1275 0.1111111          미가입
## 337  1282 1.0000000            가입
## 338  1284 0.2222222          미가입
## 339  1297 0.1111111          미가입
## 340  1302 0.4444444            가입
## 341  1308 0.2222222          미가입
## 342  1309 1.0000000            가입
## 343  1319 0.1111111          미가입
## 344  1335 0.1111111          미가입
## 345  1337 0.2222222          미가입
## 346  1346 0.3333333          미가입
## 347  1349 1.0000000            가입
## 348  1352 0.0000000          미가입
## 349  1354 0.7777778            가입
## 350  1358 0.8888889            가입
## 351  1366 0.1111111          미가입
## 352  1369 0.5555556          미가입
## 353  1374 0.0000000          미가입
## 354  1379 0.5555556          미가입
## 355  1381 0.8888889            가입
## 356  1383 0.8888889            가입
## 357  1385 0.1111111          미가입
## 358  1389 0.2222222            가입
## 359  1391 1.0000000          미가입
## 360  1400 0.1111111          미가입
## 361  1401 0.0000000          미가입
## 362  1407 0.3333333          미가입
## 363  1414 0.3333333          미가입
## 364  1416 0.4444444          미가입
## 365  1418 0.3333333            가입
## 366  1420 0.2222222          미가입
## 367  1421 0.7777778            가입
## 368  1427 0.2222222            가입
## 369  1428 0.7000000            가입
## 370  1431 0.2222222            가입
## 371  1440 0.3333333          미가입
## 372  1449 0.1111111          미가입
## 373  1459 0.8888889            가입
## 374  1460 0.1111111          미가입
## 375  1462 0.4444444          미가입
## 376  1464 0.1111111          미가입
## 377  1466 0.1111111          미가입
## 378  1469 0.3333333          미가입
## 379  1472 0.1111111          미가입
## 380  1479 0.8888889            가입
## 381  1487 0.1111111          미가입
## 382  1491 1.0000000            가입
## 383  1493 0.5555556          미가입
## 384  1496 0.2222222          미가입
## 385  1501 0.3333333          미가입
## 386  1506 0.1111111          미가입
## 387  1510 0.1111111          미가입
## 388  1511 0.1111111          미가입
## 389  1516 0.3333333          미가입
## 390  1523 0.1111111          미가입
## 391  1527 0.4444444            가입
## 392  1537 0.2222222            가입
## 393  1538 0.1111111          미가입
## 394  1540 0.3333333          미가입
## 395  1541 0.3333333          미가입
## 396  1545 0.7777778            가입
## 397  1549 0.2222222          미가입
## 398  1555 0.7777778            가입
## 399  1556 1.0000000            가입
## 400  1559 0.0000000          미가입
## 401  1566 0.8888889            가입
## 402  1568 0.8888889          미가입
## 403  1572 0.5555556          미가입
## 404  1585 0.7777778            가입
## 405  1588 0.0000000          미가입
## 406  1589 0.8888889            가입
## 407  1607 0.1111111            가입
## 408  1615 0.3333333          미가입
## 409  1617 0.2222222          미가입
## 410  1618 0.0000000          미가입
## 411  1620 0.2222222          미가입
## 412  1630 0.0000000            가입
## 413  1640 0.0000000          미가입
## 414  1647 0.1111111          미가입
## 415  1648 0.2222222          미가입
## 416  1650 0.6666667            가입
## 417  1651 1.0000000            가입
## 418  1656 0.1111111          미가입
## 419  1658 0.6666667          미가입
## 420  1661 0.2222222          미가입
## 421  1662 0.1111111          미가입
## 422  1670 0.6666667            가입
## 423  1671 0.6666667            가입
## 424  1675 0.4444444            가입
## 425  1678 1.0000000            가입
## 426  1689 0.0000000          미가입
## 427  1691 1.0000000            가입
## 428  1692 0.2222222          미가입
## 429  1714 0.1111111          미가입
## 430  1722 0.7777778            가입
## 431  1727 0.2222222          미가입
## 432  1732 1.0000000          미가입
## 433  1734 0.1111111          미가입
## 434  1736 0.6666667            가입
## 435  1738 0.2222222          미가입
## 436  1742 0.2222222          미가입
## 437  1747 1.0000000            가입
## 438  1750 0.1111111          미가입
## 439  1754 0.6666667            가입
## 440  1756 0.2222222            가입
## 441  1764 0.4444444          미가입
## 442  1765 0.3333333            가입
## 443  1767 0.4444444            가입
## 444  1769 0.2222222          미가입
## 445  1770 0.1111111          미가입
## 446  1773 0.3333333          미가입
## 447  1774 0.1111111          미가입
## 448  1785 0.5555556          미가입
## 449  1791 0.1111111          미가입
## 450  1793 1.0000000            가입
## 451  1798 1.0000000            가입
## 452  1804 0.2222222          미가입
## 453  1806 0.0000000            가입
## 454  1809 0.2222222          미가입
## 455  1812 0.4444444          미가입
## 456  1813 1.0000000            가입
## 457  1818 0.1111111          미가입
## 458  1827 0.3333333            가입
## 459  1828 0.1111111          미가입
## 460  1831 0.0000000          미가입
## 461  1834 0.1111111          미가입
## 462  1835 0.0000000          미가입
## 463  1848 0.0000000          미가입
## 464  1858 0.1111111            가입
## 465  1863 0.4444444          미가입
## 466  1865 0.8888889            가입
## 467  1873 0.4444444          미가입
## 468  1882 0.7777778            가입
## 469  1886 0.3333333          미가입
## 470  1887 0.0000000          미가입
## 471  1893 0.3333333          미가입
## 472  1896 1.0000000            가입
## 473  1899 0.0000000          미가입
## 474  1901 0.2222222          미가입
## 475  1909 0.0000000          미가입
## 476  1912 0.4444444          미가입
## 477  1918 0.8888889            가입
## 478  1920 0.1111111          미가입
## 479  1921 0.0000000          미가입
## 480  1922 0.4444444          미가입
## 481  1930 0.2222222          미가입
## 482  1937 0.7777778          미가입
## 483  1938 0.6666667          미가입
## 484  1940 0.8888889          미가입
## 485  1942 0.0000000          미가입
## 486  1949 0.4444444            가입
## 487  1951 0.4444444          미가입
## 488  1953 0.4444444            가입
## 489  1957 0.1111111          미가입
## 490  1958 0.4444444          미가입
## 491  1959 0.3333333          미가입
## 492  1975 0.7777778            가입
## 493  1976 0.2222222          미가입
## 494  1980 0.1111111          미가입
## 495  1983 0.8888889          미가입
## 496  1986 0.4444444          미가입
#part4 3
train<-read.csv('Insurance_train_10.csv')
test<-read.csv("Insurance_test_10.csv")
glimpse(train)
## Rows: 6,969
## Columns: 9
## $ Gender          <chr> "Male", "Female", "Male", "Male", "Male", "Female", "F…
## $ Ever_Married    <chr> "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "Yes", "…
## $ Age             <int> 22, 67, 67, 56, 32, 33, 61, 55, 26, 19, 58, 41, 32, 31…
## $ Graduated       <chr> "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession      <chr> "Healthcare", "Engineer", "Lawyer", "Artist", "Healthc…
## $ Work_Experience <int> 1, 1, 0, 0, 1, 1, 0, 1, 1, 4, 0, 1, 9, 1, 1, 0, 12, 3,…
## $ Spending_Score  <chr> "Low", "Low", "High", "Average", "Low", "Low", "Low", …
## $ Family_Size     <int> 4, 1, 2, 2, 3, 3, 3, 4, 3, 4, 1, 2, 5, 6, 4, 1, 1, 4, …
## $ Segmentation    <int> 4, 2, 2, 3, 3, 4, 4, 3, 1, 4, 2, 3, 4, 2, 2, 3, 1, 4, …
glimpse(test)
## Rows: 2,267
## Columns: 9
## $ X               <int> 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ Gender          <chr> "Female", "Male", "Female", "Male", "Male", "Male", "F…
## $ Ever_Married    <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ Age             <int> 36, 37, 69, 59, 47, 61, 47, 50, 19, 22, 22, 50, 27, 18…
## $ Graduated       <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession      <chr> "Engineer", "Healthcare", "", "Executive", "Doctor", "…
## $ Work_Experience <int> 0, 8, 0, 11, 0, 5, 1, 2, 0, 0, 0, 1, 8, 0, 0, 1, 1, 8,…
## $ Spending_Score  <chr> "Low", "Average", "Low", "High", "High", "Low", "Avera…
## $ Family_Size     <int> 1, 4, 1, 2, 5, 3, 3, 4, 4, 3, 6, 5, 3, 3, 1, 3, 2, 1, …
colSums(is.na(train))
##          Gender    Ever_Married             Age       Graduated      Profession 
##               0               0               0               0               0 
## Work_Experience  Spending_Score     Family_Size    Segmentation 
##               0               0               0               0
colSums(is.na(test))
##               X          Gender    Ever_Married             Age       Graduated 
##               0               0               0               0               0 
##      Profession Work_Experience  Spending_Score     Family_Size 
##               0               0               0               0
train$Segmentation<-as.factor(train$Segmentation)
model=train(Segmentation~.,data=train,
            method='knn',
            preProcess=c('center','scale'),
            trControl=trainControl(method='cv'))
model
## k-Nearest Neighbors 
## 
## 6969 samples
##    8 predictor
##    4 classes: '1', '2', '3', '4' 
## 
## Pre-processing: centered (19), scaled (19) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 6272, 6273, 6274, 6272, 6272, 6272, ... 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   5  0.4874497  0.3152952
##   7  0.4884457  0.3165998
##   9  0.4930420  0.3224582
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
confusionMatrix(model)
## Cross-Validated (10 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction    1    2    3    4
##          1  9.9  5.5  2.8  5.3
##          2  5.3  7.3  5.4  2.0
##          3  3.9  7.8 14.0  1.3
##          4  5.2  2.8  3.2 18.1
##                            
##  Accuracy (average) : 0.493
result<-predict(model,test,prob=TRUE)
df<-bind_cols(test,result)
## New names:
## • `` -> `...10`
glimpse(test)
## Rows: 2,267
## Columns: 9
## $ X               <int> 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ Gender          <chr> "Female", "Male", "Female", "Male", "Male", "Male", "F…
## $ Ever_Married    <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ Age             <int> 36, 37, 69, 59, 47, 61, 47, 50, 19, 22, 22, 50, 27, 18…
## $ Graduated       <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession      <chr> "Engineer", "Healthcare", "", "Executive", "Doctor", "…
## $ Work_Experience <int> 0, 8, 0, 11, 0, 5, 1, 2, 0, 0, 0, 1, 8, 0, 0, 1, 1, 8,…
## $ Spending_Score  <chr> "Low", "Average", "Low", "High", "High", "Low", "Avera…
## $ Family_Size     <int> 1, 4, 1, 2, 5, 3, 3, 4, 4, 3, 6, 5, 3, 3, 1, 3, 2, 1, …
glimpse(df)
## Rows: 2,267
## Columns: 10
## $ X               <int> 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ Gender          <chr> "Female", "Male", "Female", "Male", "Male", "Male", "F…
## $ Ever_Married    <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ Age             <int> 36, 37, 69, 59, 47, 61, 47, 50, 19, 22, 22, 50, 27, 18…
## $ Graduated       <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession      <chr> "Engineer", "Healthcare", "", "Executive", "Doctor", "…
## $ Work_Experience <int> 0, 8, 0, 11, 0, 5, 1, 2, 0, 0, 0, 1, 8, 0, 0, 1, 1, 8,…
## $ Spending_Score  <chr> "Low", "Average", "Low", "High", "High", "Low", "Avera…
## $ Family_Size     <int> 1, 4, 1, 2, 5, 3, 3, 4, 4, 3, 6, 5, 3, 3, 1, 3, 2, 1, …
## $ ...10           <fct> 2, 1, 2, 3, 3, 1, 3, 3, 4, 4, 4, 3, 4, 4, 2, 2, 3, 2, …
#part4 5
df<-read.csv('nyc.csv')
glimpse(df)
## Rows: 165
## Columns: 9
## $ Case       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ Restaurant <chr> "Daniella Ristorante", "Tello's Ristorante", "Biricchino", …
## $ Price      <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47,…
## $ Food       <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20,…
## $ Decor      <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18,…
## $ Service    <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21,…
## $ East       <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ latitude   <dbl> 40.74683, 40.74342, 40.74886, 40.74848, 40.73958, 40.74069,…
## $ longitude  <dbl> -73.99676, -73.99954, -73.99552, -74.00331, -73.99591, -73.…
summary(df)
##       Case        Restaurant            Price            Food      
##  Min.   :  1.0   Length:165         Min.   :19.00   Min.   :16.00  
##  1st Qu.: 43.0   Class :character   1st Qu.:36.00   1st Qu.:19.00  
##  Median : 84.0   Mode  :character   Median :43.00   Median :21.00  
##  Mean   : 84.5                      Mean   :42.67   Mean   :20.59  
##  3rd Qu.:127.0                      3rd Qu.:50.00   3rd Qu.:22.00  
##  Max.   :168.0                      Max.   :65.00   Max.   :25.00  
##      Decor          Service           East           latitude    
##  Min.   : 6.00   Min.   :14.00   Min.   :0.0000   Min.   :40.71  
##  1st Qu.:16.00   1st Qu.:18.00   1st Qu.:0.0000   1st Qu.:40.76  
##  Median :18.00   Median :20.00   Median :1.0000   Median :40.76  
##  Mean   :17.68   Mean   :19.39   Mean   :0.6303   Mean   :40.76  
##  3rd Qu.:19.00   3rd Qu.:21.00   3rd Qu.:1.0000   3rd Qu.:40.77  
##  Max.   :25.00   Max.   :24.00   Max.   :1.0000   Max.   :40.80  
##    longitude     
##  Min.   :-74.01  
##  1st Qu.:-73.98  
##  Median :-73.97  
##  Mean   :-73.97  
##  3rd Qu.:-73.96  
##  Max.   :-73.93
train_list=createDataPartition(df$Price,p=.7,list=FALSE)
train=df[train_list,]
test=df[-train_list,]
test<- test%>% select(-Price,-Restaurant,-Case)
train<-train %>% select(-Restaurant,-Case)
model=train(Price~.,data=train,
            method='rf',
            trControl=trainControl(method='cv'))
model
## Random Forest 
## 
## 118 samples
##   6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 107, 105, 105, 105, 107, 107, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE     
##   2     6.254265  0.5847555  4.897332
##   4     6.267707  0.5852535  4.931151
##   6     6.339397  0.5826628  4.946240
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 2.
result<-predict(model,test)
head(result)
##        5        7        8        9       10       12 
## 49.40921 43.50968 44.39563 49.24984 39.01449 44.16476
#part4 2.3
df<-read.csv("wbc.csv")
glimpse(df)
## Rows: 569
## Columns: 33
## $ id                      <int> 842302, 842517, 84300903, 84348301, 84358402, …
## $ diagnosis               <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "…
## $ radius_mean             <dbl> 17.990, 20.570, 19.690, 11.420, 20.290, 12.450…
## $ texture_mean            <dbl> 10.38, 17.77, 21.25, 20.38, 14.34, 15.70, 19.9…
## $ perimeter_mean          <dbl> 122.80, 132.90, 130.00, 77.58, 135.10, 82.57, …
## $ area_mean               <dbl> 1001.0, 1326.0, 1203.0, 386.1, 1297.0, 477.1, …
## $ smoothness_mean         <dbl> 0.11840, 0.08474, 0.10960, 0.14250, 0.10030, 0…
## $ compactness_mean        <dbl> 0.27760, 0.07864, 0.15990, 0.28390, 0.13280, 0…
## $ concavity_mean          <dbl> 0.30010, 0.08690, 0.19740, 0.24140, 0.19800, 0…
## $ concave.points_mean     <dbl> 0.14710, 0.07017, 0.12790, 0.10520, 0.10430, 0…
## $ symmetry_mean           <dbl> 0.2419, 0.1812, 0.2069, 0.2597, 0.1809, 0.2087…
## $ fractal_dimension_mean  <dbl> 0.07871, 0.05667, 0.05999, 0.09744, 0.05883, 0…
## $ radius_se               <dbl> 1.0950, 0.5435, 0.7456, 0.4956, 0.7572, 0.3345…
## $ texture_se              <dbl> 0.9053, 0.7339, 0.7869, 1.1560, 0.7813, 0.8902…
## $ perimeter_se            <dbl> 8.589, 3.398, 4.585, 3.445, 5.438, 2.217, 3.18…
## $ area_se                 <dbl> 153.40, 74.08, 94.03, 27.23, 94.44, 27.19, 53.…
## $ smoothness_se           <dbl> 0.006399, 0.005225, 0.006150, 0.009110, 0.0114…
## $ compactness_se          <dbl> 0.049040, 0.013080, 0.040060, 0.074580, 0.0246…
## $ concavity_se            <dbl> 0.05373, 0.01860, 0.03832, 0.05661, 0.05688, 0…
## $ concave.points_se       <dbl> 0.015870, 0.013400, 0.020580, 0.018670, 0.0188…
## $ symmetry_se             <dbl> 0.03003, 0.01389, 0.02250, 0.05963, 0.01756, 0…
## $ fractal_dimension_se    <dbl> 0.006193, 0.003532, 0.004571, 0.009208, 0.0051…
## $ radius_worst            <dbl> 25.38, 24.99, 23.57, 14.91, 22.54, 15.47, 22.8…
## $ texture_worst           <dbl> 17.33, 23.41, 25.53, 26.50, 16.67, 23.75, 27.6…
## $ perimeter_worst         <dbl> 184.60, 158.80, 152.50, 98.87, 152.20, 103.40,…
## $ area_worst              <dbl> 2019.0, 1956.0, 1709.0, 567.7, 1575.0, 741.6, …
## $ smoothness_worst        <dbl> 0.1622, 0.1238, 0.1444, 0.2098, 0.1374, 0.1791…
## $ compactness_worst       <dbl> 0.6656, 0.1866, 0.4245, 0.8663, 0.2050, 0.5249…
## $ concavity_worst         <dbl> 0.71190, 0.24160, 0.45040, 0.68690, 0.40000, 0…
## $ concave.points_worst    <dbl> 0.26540, 0.18600, 0.24300, 0.25750, 0.16250, 0…
## $ symmetry_worst          <dbl> 0.4601, 0.2750, 0.3613, 0.6638, 0.2364, 0.3985…
## $ fractal_dimension_worst <dbl> 0.11890, 0.08902, 0.08758, 0.17300, 0.07678, 0…
## $ X                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
colSums(is.na(df))
##                      id               diagnosis             radius_mean 
##                       0                       0                       0 
##            texture_mean          perimeter_mean               area_mean 
##                       0                       0                       0 
##         smoothness_mean        compactness_mean          concavity_mean 
##                       0                       0                       0 
##     concave.points_mean           symmetry_mean  fractal_dimension_mean 
##                       0                       0                       0 
##               radius_se              texture_se            perimeter_se 
##                       0                       0                       0 
##                 area_se           smoothness_se          compactness_se 
##                       0                       0                       0 
##            concavity_se       concave.points_se             symmetry_se 
##                       0                       0                       0 
##    fractal_dimension_se            radius_worst           texture_worst 
##                       0                       0                       0 
##         perimeter_worst              area_worst        smoothness_worst 
##                       0                       0                       0 
##       compactness_worst         concavity_worst    concave.points_worst 
##                       0                       0                       0 
##          symmetry_worst fractal_dimension_worst                       X 
##                       0                       0                     569
df<-df %>% select(-id,-X)
train_list<-createDataPartition(df$diagnosis,p=.8,list=FALSE)
train_df<-df[train_list,]
test_df<-df[-train_list,]
test_df<-test_df %>% select(-diagnosis)

model=train(diagnosis~.,data=train_df,
            method='knn',
            preProcess=c('center','scale'),
            trControl=trainControl(method='cv'))
model
## k-Nearest Neighbors 
## 
## 456 samples
##  30 predictor
##   2 classes: 'B', 'M' 
## 
## Pre-processing: centered (30), scaled (30) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 410, 410, 411, 411, 411, 410, ... 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   5  0.9692754  0.9331713
##   7  0.9648792  0.9230639
##   9  0.9648792  0.9240188
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 5.
result<-predict(model,test_df,type='prob')
head(result)
##     B   M
## 1 0.2 0.8
## 2 0.0 1.0
## 3 0.2 0.8
## 4 1.0 0.0
## 5 0.0 1.0
## 6 0.0 1.0