load_data = read.csv(file = "C:/Users/Lakshman Y/Desktop/Gremener/Campaign_Data.csv",
header = TRUE, na.strings = c("",".",NA,NULL,NaN),sep = ";",
quote = "", strip.white = TRUE
)
str(load_data)
## 'data.frame': 41188 obs. of 21 variables:
## $ X.age : Factor w/ 78 levels "\"17","\"18",..: 40 41 21 24 40 29 43 25 8 9 ...
## $ X..job.. : Factor w/ 12 levels "\"\"admin.\"\"",..: 4 8 8 1 8 8 1 2 10 8 ...
## $ X..marital.. : Factor w/ 4 levels "\"\"divorced\"\"",..: 2 2 2 2 2 2 2 2 3 3 ...
## $ X..education.. : Factor w/ 8 levels "\"\"basic.4y\"\"",..: 1 4 4 2 4 3 6 8 6 4 ...
## $ X..default.. : Factor w/ 3 levels "\"\"no\"\"","\"\"unknown\"\"",..: 1 2 1 1 1 2 1 2 1 1 ...
## $ X..housing.. : Factor w/ 3 levels "\"\"no\"\"","\"\"unknown\"\"",..: 1 1 3 1 1 1 1 1 3 3 ...
## $ X..loan.. : Factor w/ 3 levels "\"\"no\"\"","\"\"unknown\"\"",..: 1 1 1 1 3 1 1 1 1 1 ...
## $ X..contact.. : Factor w/ 2 levels "\"\"cellular\"\"",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ X..month.. : Factor w/ 10 levels "\"\"apr\"\"",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ X..day_of_week.. : Factor w/ 5 levels "\"\"fri\"\"",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ X..duration.. : int 261 149 226 151 307 198 139 217 380 50 ...
## $ X..campaign.. : int 1 1 1 1 1 1 1 1 1 1 ...
## $ X..pdays.. : int 999 999 999 999 999 999 999 999 999 999 ...
## $ X..previous.. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ X..poutcome.. : Factor w/ 3 levels "\"\"failure\"\"",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ X..emp.var.rate.. : num 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
## $ X..cons.price.idx..: num 94 94 94 94 94 ...
## $ X..cons.conf.idx.. : num -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
## $ X..euribor3m.. : num 4.86 4.86 4.86 4.86 4.86 ...
## $ X..nr.employed.. : num 5191 5191 5191 5191 5191 ...
## $ X..response... : Factor w/ 2 levels "\"\"no\"\"\"",..: 1 1 1 1 1 1 1 1 1 1 ...
#View(load_data)
clean_data =as.data.frame(sapply(load_data, function(x) gsub("\"", "", x)))
names(clean_data) = gsub("\\.","",names(clean_data))
names(clean_data) = gsub("\\X","",names(clean_data))
str(clean_data)
## 'data.frame': 41188 obs. of 21 variables:
## $ age : Factor w/ 78 levels "17","18","19",..: 40 41 21 24 40 29 43 25 8 9 ...
## $ job : Factor w/ 12 levels "admin.","blue-collar",..: 4 8 8 1 8 8 1 2 10 8 ...
## $ marital : Factor w/ 4 levels "divorced","married",..: 2 2 2 2 2 2 2 2 3 3 ...
## $ education : Factor w/ 8 levels "basic.4y","basic.6y",..: 1 4 4 2 4 3 6 8 6 4 ...
## $ default : Factor w/ 3 levels "no","unknown",..: 1 2 1 1 1 2 1 2 1 1 ...
## $ housing : Factor w/ 3 levels "no","unknown",..: 1 1 3 1 1 1 1 1 3 3 ...
## $ loan : Factor w/ 3 levels "no","unknown",..: 1 1 1 1 3 1 1 1 1 1 ...
## $ contact : Factor w/ 2 levels "cellular","telephone": 2 2 2 2 2 2 2 2 2 2 ...
## $ month : Factor w/ 10 levels "apr","aug","dec",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ day_of_week : Factor w/ 5 levels "fri","mon","thu",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ duration : Factor w/ 1544 levels "0","1","10","100",..: 708 419 657 433 767 599 356 641 861 996 ...
## $ campaign : Factor w/ 42 levels "1","10","11",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ pdays : Factor w/ 27 levels "0","1","10","11",..: 27 27 27 27 27 27 27 27 27 27 ...
## $ previous : Factor w/ 8 levels "0","1","2","3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ poutcome : Factor w/ 3 levels "failure","nonexistent",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ empvarrate : Factor w/ 10 levels "-0.1","-0.2",..: 9 9 9 9 9 9 9 9 9 9 ...
## $ conspriceidx: Factor w/ 26 levels "92.201","92.379",..: 19 19 19 19 19 19 19 19 19 19 ...
## $ consconfidx : Factor w/ 26 levels "-26.9","-29.8",..: 10 10 10 10 10 10 10 10 10 10 ...
## $ euribor3m : Factor w/ 316 levels "0.634","0.635",..: 288 288 288 288 288 288 288 288 288 288 ...
## $ nremployed : Factor w/ 11 levels "4963.6","4991.6",..: 9 9 9 9 9 9 9 9 9 9 ...
## $ response : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#View(clean_data)
#clean_data1 = clean_data
clean_data[,c(1,11:14,16:20)] = apply(clean_data[,c(1,11:14,16:20)], 2,
function(x) as.numeric(as.character(x)))
#str(clean_data)
# NO missing values
apply(is.na(clean_data),2,sum)
## age job marital education default
## 0 0 0 0 0
## housing loan contact month day_of_week
## 0 0 0 0 0
## duration campaign pdays previous poutcome
## 0 0 0 0 0
## empvarrate conspriceidx consconfidx euribor3m nremployed
## 0 0 0 0 0
## response
## 0
#write.csv(clean_data, "mycampaign_data.csv")
library(ggplot2)
ggplot(aes(x= month,y = duration, col = response), data = clean_data) + geom_boxplot() +
ggtitle("No response is independent of month however YES is MAY and JULY")
ggplot(aes(x= job,y = duration), data = clean_data) + geom_point()
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mean(clean_data$duration)
## [1] 258.285
Funplotdata_duration <- function(df, i) {
plotdata = df %>% group_by(df[,i], response) %>% summarize(mean(duration))
plotdata = data.frame(plotdata)
ggplot(aes(plotdata[[1]], plotdata[[3]], col= plotdata[[2]]), data = plotdata) +
geom_point() + xlab(i) + ylab("duration")+ labs(col = "Response") +
ggtitle("Duration and Response with variables")
}
Funplotdata_duration (clean_data,"age")
Funplotdata_duration (clean_data,"job")
Funplotdata_duration (clean_data,"marital")
Funplotdata_duration (clean_data,"education")
Funplotdata_duration (clean_data,"default")
Funplotdata_duration (clean_data,"loan")
Funplotdata_campaign<- function(df, i) {
library(dplyr)
plotdata = df %>% group_by(df[,i], response) %>% summarize(mean(campaign))
plotdata = data.frame(plotdata)
ggplot(aes(plotdata[[1]], plotdata[[3]], col= plotdata[[2]]), data = plotdata) +
geom_point() + xlab(i) + ylab("campaign") + labs(col = "Response") +
theme(axis.text=element_text(size=7))+ ggtitle("Campaign and Response with variables")
}
Funplotdata_campaign(clean_data,"age")
Funplotdata_campaign(clean_data,"job")
Funplotdata_campaign(clean_data,"marital")
Funplotdata_campaign(clean_data,"education")
Funplotdata_poutcome<- function(df, i) {
library(dplyr)
plotdata = df %>% group_by(df[,i], poutcome) %>% summarize(mean(duration))
plotdata = data.frame(plotdata)
ggplot(aes(plotdata[[1]], plotdata[[3]], col= plotdata[[2]]), data = plotdata) +
geom_point() + xlab(i) + ylab("duration") + labs(col = "poutcome") +
theme(axis.text=element_text(size=7)) + ggtitle("Poutcome and Duration with variables")
}
Funplotdata_poutcome(clean_data,"age")
Funplotdata_poutcome(clean_data,"marital")
Funplotdata_poutcome(clean_data,"education")
Funplotdata_poutcome(clean_data,"loan")
Funplotdata_poutcome(clean_data,"default")
Funplotdata_poutcome(clean_data,"housing")
Funplotdata_poutcome(clean_data,"job")
p =clean_data%>% group_by(job,education,marital,response)%>% summarise(mean(duration))
data.frame(p)
## job education marital response mean.duration.
## 1 admin. basic.4y divorced no 117.50000
## 2 admin. basic.4y divorced yes 469.00000
## 3 admin. basic.4y married no 199.38462
## 4 admin. basic.4y married yes 824.50000
## 5 admin. basic.4y single no 330.15385
## 6 admin. basic.6y divorced no 243.62500
## 7 admin. basic.6y married no 215.13131
## 8 admin. basic.6y married yes 864.85714
## 9 admin. basic.6y single no 189.67857
## 10 admin. basic.6y single yes 470.00000
## 11 admin. basic.9y divorced no 208.12500
## 12 admin. basic.9y divorced yes 1028.50000
## 13 admin. basic.9y married no 217.30798
## 14 admin. basic.9y married yes 610.30435
## 15 admin. basic.9y single no 216.56557
## 16 admin. basic.9y single yes 548.66667
## 17 admin. high.school divorced no 229.81818
## 18 admin. high.school divorced yes 594.90698
## 19 admin. high.school married no 219.84715
## 20 admin. high.school married yes 543.60000
## 21 admin. high.school single no 215.50000
## 22 admin. high.school single yes 548.75610
## 23 admin. high.school unknown no 182.00000
## 24 admin. illiterate married no 151.00000
## 25 admin. professional.course divorced no 225.37209
## 26 admin. professional.course divorced yes 385.66667
## 27 admin. professional.course married no 235.27222
## 28 admin. professional.course married yes 515.96296
## 29 admin. professional.course single no 208.46667
## 30 admin. professional.course single yes 644.68750
## 31 admin. professional.course unknown no 125.00000
## 32 admin. university.degree divorced no 206.57886
## 33 admin. university.degree divorced yes 590.13333
## 34 admin. university.degree married no 211.07305
## 35 admin. university.degree married yes 504.95596
## 36 admin. university.degree single no 211.81442
## 37 admin. university.degree single yes 452.58333
## 38 admin. university.degree unknown no 238.50000
## 39 admin. university.degree unknown yes 1030.50000
## 40 admin. unknown divorced no 180.73913
## 41 admin. unknown divorced yes 440.50000
## 42 admin. unknown married no 232.11570
## 43 admin. unknown married yes 475.26923
## 44 admin. unknown single no 246.76119
## 45 admin. unknown single yes 443.60000
## 46 blue-collar basic.4y divorced no 235.25281
## 47 blue-collar basic.4y divorced yes 638.86667
## 48 blue-collar basic.4y married no 234.22638
## 49 blue-collar basic.4y married yes 799.96875
## 50 blue-collar basic.4y single no 233.09319
## 51 blue-collar basic.4y single yes 828.36364
## 52 blue-collar basic.4y unknown no 516.00000
## 53 blue-collar basic.4y unknown yes 1180.00000
## 54 blue-collar basic.6y divorced no 268.06494
## 55 blue-collar basic.6y divorced yes 879.60000
## 56 blue-collar basic.6y married no 218.58578
## 57 blue-collar basic.6y married yes 776.09877
## 58 blue-collar basic.6y single no 225.74457
## 59 blue-collar basic.6y single yes 880.09524
## 60 blue-collar basic.6y unknown no 355.00000
## 61 blue-collar basic.9y divorced no 240.99630
## 62 blue-collar basic.9y divorced yes 914.31250
## 63 blue-collar basic.9y married no 231.12273
## 64 blue-collar basic.9y married yes 724.91358
## 65 blue-collar basic.9y single no 228.55716
## 66 blue-collar basic.9y single yes 677.27869
## 67 blue-collar basic.9y unknown no 130.50000
## 68 blue-collar basic.9y unknown yes 314.00000
## 69 blue-collar high.school divorced no 229.69355
## 70 blue-collar high.school divorced yes 557.55556
## 71 blue-collar high.school married no 204.68884
## 72 blue-collar high.school married yes 788.63889
## 73 blue-collar high.school single no 231.64883
## 74 blue-collar high.school single yes 638.97917
## 75 blue-collar high.school unknown no 242.00000
## 76 blue-collar high.school unknown yes 192.00000
## 77 blue-collar illiterate married no 345.71429
## 78 blue-collar illiterate single no 259.00000
## 79 blue-collar professional.course divorced no 146.62069
## 80 blue-collar professional.course divorced yes 469.33333
## 81 blue-collar professional.course married no 236.73701
## 82 blue-collar professional.course married yes 639.00000
## 83 blue-collar professional.course single no 268.02667
## 84 blue-collar professional.course single yes 412.50000
## 85 blue-collar university.degree divorced no 73.00000
## 86 blue-collar university.degree married no 234.58621
## 87 blue-collar university.degree married yes 389.50000
## 88 blue-collar university.degree single no 203.81481
## 89 blue-collar university.degree single yes 563.85714
## 90 blue-collar unknown divorced no 251.12281
## 91 blue-collar unknown divorced yes 697.50000
## 92 blue-collar unknown married no 232.43103
## 93 blue-collar unknown married yes 871.41176
## 94 blue-collar unknown single no 241.04938
## 95 blue-collar unknown single yes 552.20000
## 96 blue-collar unknown unknown no 166.00000
## 97 entrepreneur basic.4y divorced no 321.00000
## 98 entrepreneur basic.4y married no 257.05932
## 99 entrepreneur basic.4y married yes 684.00000
## 100 entrepreneur basic.4y single no 159.20000
## 101 entrepreneur basic.4y single yes 185.00000
## 102 entrepreneur basic.6y divorced no 189.00000
## 103 entrepreneur basic.6y married no 206.01818
## 104 entrepreneur basic.6y married yes 603.00000
## 105 entrepreneur basic.6y single no 32.33333
## 106 entrepreneur basic.6y single yes 1053.00000
## 107 entrepreneur basic.9y divorced no 193.93750
## 108 entrepreneur basic.9y divorced yes 518.50000
## 109 entrepreneur basic.9y married no 218.09494
## 110 entrepreneur basic.9y married yes 857.12500
## 111 entrepreneur basic.9y single no 219.00000
## 112 entrepreneur basic.9y single yes 822.00000
## 113 entrepreneur high.school divorced no 208.76316
## 114 entrepreneur high.school divorced yes 843.40000
## 115 entrepreneur high.school married no 260.39735
## 116 entrepreneur high.school married yes 578.00000
## 117 entrepreneur high.school single no 214.57143
## 118 entrepreneur high.school single yes 955.50000
## 119 entrepreneur high.school unknown no 825.00000
## 120 entrepreneur illiterate married no 87.00000
## 121 entrepreneur illiterate married yes 838.00000
## 122 entrepreneur professional.course divorced no 163.50000
## 123 entrepreneur professional.course divorced yes 950.00000
## 124 entrepreneur professional.course married no 227.32258
## 125 entrepreneur professional.course married yes 804.57143
## 126 entrepreneur professional.course single no 200.26316
## 127 entrepreneur professional.course single yes 373.00000
## 128 entrepreneur university.degree divorced no 192.57143
## 129 entrepreneur university.degree divorced yes 434.60000
## 130 entrepreneur university.degree married no 225.26703
## 131 entrepreneur university.degree married yes 647.34043
## 132 entrepreneur university.degree single no 219.61616
## 133 entrepreneur university.degree single yes 642.92308
## 134 entrepreneur university.degree unknown no 157.00000
## 135 entrepreneur university.degree unknown yes 164.00000
## 136 entrepreneur unknown divorced no 211.44444
## 137 entrepreneur unknown divorced yes 536.00000
## 138 entrepreneur unknown married no 216.20000
## 139 entrepreneur unknown married yes 875.00000
## 140 entrepreneur unknown single no 201.50000
## 141 entrepreneur unknown single yes 542.00000
## 142 housemaid basic.4y divorced no 232.59016
## 143 housemaid basic.4y divorced yes 349.28571
## 144 housemaid basic.4y married no 215.91228
## 145 housemaid basic.4y married yes 478.92683
## 146 housemaid basic.4y single no 232.47368
## 147 housemaid basic.4y single yes 393.66667
## 148 housemaid basic.4y unknown no 198.00000
## 149 housemaid basic.6y divorced no 181.50000
## 150 housemaid basic.6y married no 216.87719
## 151 housemaid basic.6y married yes 1143.00000
## 152 housemaid basic.6y single no 94.00000
## 153 housemaid basic.9y divorced no 135.00000
## 154 housemaid basic.9y divorced yes 926.00000
## 155 housemaid basic.9y married no 202.96970
## 156 housemaid basic.9y married yes 897.50000
## 157 housemaid basic.9y single no 295.60000
## 158 housemaid high.school divorced no 247.00000
## 159 housemaid high.school divorced yes 1001.00000
## 160 housemaid high.school married no 212.57798
## 161 housemaid high.school married yes 450.12500
## 162 housemaid high.school single no 341.69231
## 163 housemaid high.school single yes 648.00000
## 164 housemaid illiterate married no 176.00000
## 165 housemaid professional.course divorced no 214.50000
## 166 housemaid professional.course divorced yes 523.66667
## 167 housemaid professional.course married no 288.34483
## 168 housemaid professional.course married yes 659.40000
## 169 housemaid professional.course single no 138.63636
## 170 housemaid professional.course single yes 225.66667
## 171 housemaid university.degree divorced no 294.46154
## 172 housemaid university.degree divorced yes 643.00000
## 173 housemaid university.degree married no 226.50000
## 174 housemaid university.degree married yes 626.44444
## 175 housemaid university.degree single no 160.61290
## 176 housemaid university.degree single yes 338.40000
## 177 housemaid university.degree unknown no 78.50000
## 178 housemaid unknown divorced no 142.11111
## 179 housemaid unknown married no 165.21739
## 180 housemaid unknown married yes 332.25000
## 181 housemaid unknown single no 286.00000
## 182 housemaid unknown single yes 741.00000
## 183 management basic.4y divorced no 215.33333
## 184 management basic.4y married no 253.98750
## 185 management basic.4y married yes 503.20000
## 186 management basic.4y single no 225.44444
## 187 management basic.6y divorced no 206.40000
## 188 management basic.6y married no 236.15625
## 189 management basic.6y married yes 495.90000
## 190 management basic.6y single no 218.00000
## 191 management basic.9y divorced no 141.55556
## 192 management basic.9y married no 225.09649
## 193 management basic.9y married yes 715.44444
## 194 management basic.9y single no 269.43750
## 195 management basic.9y single yes 552.50000
## 196 management high.school divorced no 258.26471
## 197 management high.school divorced yes 557.00000
## 198 management high.school married no 228.22167
## 199 management high.school married yes 642.69231
## 200 management high.school single no 239.72093
## 201 management high.school single yes 728.00000
## 202 management high.school unknown no 95.00000
## 203 management professional.course divorced no 96.58333
## 204 management professional.course divorced yes 503.50000
## 205 management professional.course married no 250.75862
## 206 management professional.course married yes 692.75000
## 207 management professional.course single no 157.09091
## 208 management professional.course single yes 345.00000
## 209 management university.degree divorced no 218.20755
## 210 management university.degree divorced yes 510.87500
## 211 management university.degree married no 218.96672
## 212 management university.degree married yes 524.98817
## 213 management university.degree single no 204.26970
## 214 management university.degree single yes 553.05357
## 215 management university.degree unknown no 191.00000
## 216 management unknown divorced no 182.66667
## 217 management unknown divorced yes 436.33333
## 218 management unknown married no 223.35366
## 219 management unknown married yes 618.31250
## 220 management unknown single no 282.25000
## 221 management unknown single yes 712.00000
## 222 retired basic.4y divorced no 197.76238
## 223 retired basic.4y divorced yes 356.34000
## 224 retired basic.4y married no 224.59933
## 225 retired basic.4y married yes 406.06870
## 226 retired basic.4y single no 230.16667
## 227 retired basic.4y single yes 307.50000
## 228 retired basic.4y unknown no 422.00000
## 229 retired basic.6y divorced no 255.00000
## 230 retired basic.6y divorced yes 218.33333
## 231 retired basic.6y married no 231.25000
## 232 retired basic.6y married yes 257.57143
## 233 retired basic.6y single no 231.00000
## 234 retired basic.9y divorced no 239.93750
## 235 retired basic.9y divorced yes 285.66667
## 236 retired basic.9y married no 212.58252
## 237 retired basic.9y married yes 676.35714
## 238 retired basic.9y single no 108.66667
## 239 retired basic.9y single yes 955.00000
## 240 retired basic.9y unknown no 340.00000
## 241 retired basic.9y unknown yes 810.00000
## 242 retired high.school divorced no 226.71795
## 243 retired high.school divorced yes 329.75000
## 244 retired high.school married no 241.82581
## 245 retired high.school married yes 532.73469
## 246 retired high.school single no 350.15000
## 247 retired high.school single yes 2035.00000
## 248 retired illiterate divorced no 146.00000
## 249 retired illiterate divorced yes 128.00000
## 250 retired illiterate married yes 125.00000
## 251 retired professional.course divorced no 227.29545
## 252 retired professional.course divorced yes 459.71429
## 253 retired professional.course married no 208.61240
## 254 retired professional.course married yes 387.70833
## 255 retired professional.course single no 303.36364
## 256 retired professional.course single yes 789.50000
## 257 retired university.degree divorced no 252.02083
## 258 retired university.degree divorced yes 425.88889
## 259 retired university.degree married no 231.25503
## 260 retired university.degree married yes 426.75926
## 261 retired university.degree single no 208.09524
## 262 retired university.degree single yes 186.00000
## 263 retired university.degree unknown no 253.00000
## 264 retired unknown divorced no 255.00000
## 265 retired unknown divorced yes 300.57143
## 266 retired unknown married no 172.53333
## 267 retired unknown married yes 404.28000
## 268 retired unknown single no 160.00000
## 269 retired unknown single yes 188.00000
## 270 self-employed basic.4y divorced no 274.40000
## 271 self-employed basic.4y married no 242.47297
## 272 self-employed basic.4y married yes 552.00000
## 273 self-employed basic.4y single no 227.00000
## 274 self-employed basic.6y divorced no 94.00000
## 275 self-employed basic.6y married no 219.44444
## 276 self-employed basic.6y married yes 734.00000
## 277 self-employed basic.6y single no 188.66667
## 278 self-employed basic.6y unknown no 683.00000
## 279 self-employed basic.9y divorced no 170.85000
## 280 self-employed basic.9y married no 214.35802
## 281 self-employed basic.9y married yes 1065.70588
## 282 self-employed basic.9y single no 203.15000
## 283 self-employed basic.9y single yes 104.00000
## 284 self-employed high.school divorced no 279.00000
## 285 self-employed high.school divorced yes 256.75000
## 286 self-employed high.school married no 209.97059
## 287 self-employed high.school married yes 497.75000
## 288 self-employed high.school single no 219.32353
## 289 self-employed illiterate married no 82.00000
## 290 self-employed illiterate married yes 488.00000
## 291 self-employed professional.course divorced no 299.33333
## 292 self-employed professional.course divorced yes 610.50000
## 293 self-employed professional.course married no 195.58163
## 294 self-employed professional.course married yes 763.61538
## 295 self-employed professional.course single no 166.88889
## 296 self-employed professional.course single yes 650.00000
## 297 self-employed professional.course unknown no 444.00000
## 298 self-employed university.degree divorced no 281.60345
## 299 self-employed university.degree divorced yes 454.20000
## 300 self-employed university.degree married no 220.50131
## 301 self-employed university.degree married yes 559.39535
## 302 self-employed university.degree single no 210.75771
## 303 self-employed university.degree single yes 567.06977
## 304 self-employed university.degree unknown no 221.00000
## 305 self-employed unknown divorced no 596.50000
## 306 self-employed unknown married no 268.11765
## 307 self-employed unknown single no 316.83333
## 308 self-employed unknown single yes 593.00000
## 309 services basic.4y divorced no 153.52000
## 310 services basic.4y divorced yes 1579.00000
## 311 services basic.4y married no 241.00000
## 312 services basic.4y married yes 648.20000
## 313 services basic.4y single no 275.11765
## 314 services basic.4y single yes 1777.00000
## 315 services basic.6y divorced no 281.85714
## 316 services basic.6y divorced yes 616.75000
## 317 services basic.6y married no 211.44000
## 318 services basic.6y married yes 789.75000
## 319 services basic.6y single no 271.86957
## 320 services basic.6y single yes 717.50000
## 321 services basic.9y divorced no 246.30769
## 322 services basic.9y divorced yes 599.50000
## 323 services basic.9y married no 217.84762
## 324 services basic.9y married yes 545.73684
## 325 services basic.9y single no 235.90722
## 326 services basic.9y single yes 636.00000
## 327 services high.school divorced no 230.75758
## 328 services high.school divorced yes 706.10526
## 329 services high.school married no 223.07543
## 330 services high.school married yes 680.14679
## 331 services high.school single no 233.85429
## 332 services high.school single yes 616.21333
## 333 services high.school unknown no 262.75000
## 334 services professional.course divorced no 246.33333
## 335 services professional.course married no 192.88696
## 336 services professional.course married yes 819.00000
## 337 services professional.course single no 216.25000
## 338 services professional.course single yes 428.22222
## 339 services professional.course unknown no 7.00000
## 340 services university.degree divorced no 179.86667
## 341 services university.degree divorced yes 494.60000
## 342 services university.degree married no 190.60606
## 343 services university.degree married yes 402.85714
## 344 services university.degree single no 204.75758
## 345 services university.degree single yes 388.00000
## 346 services unknown divorced no 223.46667
## 347 services unknown divorced yes 588.00000
## 348 services unknown married no 196.17857
## 349 services unknown married yes 648.00000
## 350 services unknown single no 276.41935
## 351 services unknown single yes 514.23077
## 352 services unknown unknown no 382.00000
## 353 student basic.4y single no 307.05556
## 354 student basic.4y single yes 359.87500
## 355 student basic.6y divorced no 467.00000
## 356 student basic.6y single no 375.40000
## 357 student basic.6y single yes 285.57143
## 358 student basic.9y married no 263.50000
## 359 student basic.9y single no 208.36066
## 360 student basic.9y single yes 429.17143
## 361 student basic.9y unknown no 155.00000
## 362 student high.school married no 178.33333
## 363 student high.school married yes 660.00000
## 364 student high.school single no 211.06494
## 365 student high.school single yes 426.58036
## 366 student professional.course divorced yes 1110.00000
## 367 student professional.course married no 237.00000
## 368 student professional.course married yes 188.50000
## 369 student professional.course single no 244.31818
## 370 student professional.course single yes 281.21429
## 371 student university.degree divorced no 86.50000
## 372 student university.degree divorced yes 453.00000
## 373 student university.degree married no 206.46154
## 374 student university.degree married yes 276.00000
## 375 student university.degree single no 252.17500
## 376 student university.degree single yes 395.24138
## 377 student unknown divorced no 173.66667
## 378 student unknown married no 89.50000
## 379 student unknown single no 242.82524
## 380 student unknown single yes 390.16949
## 381 technician basic.4y divorced no 373.00000
## 382 technician basic.4y divorced yes 420.66667
## 383 technician basic.4y married no 290.35897
## 384 technician basic.4y married yes 582.83333
## 385 technician basic.4y single no 238.33333
## 386 technician basic.6y divorced no 109.00000
## 387 technician basic.6y divorced yes 716.00000
## 388 technician basic.6y married no 217.38710
## 389 technician basic.6y married yes 510.75000
## 390 technician basic.6y single no 181.42857
## 391 technician basic.6y single yes 1448.00000
## 392 technician basic.6y unknown no 197.00000
## 393 technician basic.9y divorced no 187.60465
## 394 technician basic.9y divorced yes 853.00000
## 395 technician basic.9y married no 254.69604
## 396 technician basic.9y married yes 665.60000
## 397 technician basic.9y single no 181.02597
## 398 technician basic.9y single yes 361.63636
## 399 technician high.school divorced no 210.44545
## 400 technician high.school divorced yes 329.50000
## 401 technician high.school married no 228.18246
## 402 technician high.school married yes 559.51020
## 403 technician high.school single no 206.77075
## 404 technician high.school single yes 729.19231
## 405 technician high.school unknown no 285.66667
## 406 technician professional.course divorced no 201.47757
## 407 technician professional.course divorced yes 660.15152
## 408 technician professional.course married no 211.33570
## 409 technician professional.course married yes 549.06122
## 410 technician professional.course single no 216.05642
## 411 technician professional.course single yes 494.42105
## 412 technician professional.course unknown no 245.00000
## 413 technician university.degree divorced no 181.15038
## 414 technician university.degree divorced yes 666.76471
## 415 technician university.degree married no 201.99327
## 416 technician university.degree married yes 562.07059
## 417 technician university.degree single no 219.51348
## 418 technician university.degree single yes 525.42149
## 419 technician university.degree unknown no 209.00000
## 420 technician university.degree unknown yes 575.00000
## 421 technician unknown divorced no 182.54545
## 422 technician unknown married no 223.92079
## 423 technician unknown married yes 593.26316
## 424 technician unknown single no 214.26923
## 425 technician unknown single yes 474.66667
## 426 technician unknown unknown no 667.00000
## 427 unemployed basic.4y divorced no 222.90909
## 428 unemployed basic.4y divorced yes 419.60000
## 429 unemployed basic.4y married no 218.21429
## 430 unemployed basic.4y married yes 688.12500
## 431 unemployed basic.4y single no 265.03448
## 432 unemployed basic.4y single yes 338.00000
## 433 unemployed basic.6y divorced no 171.50000
## 434 unemployed basic.6y married no 174.16667
## 435 unemployed basic.6y married yes 834.75000
## 436 unemployed basic.6y single no 114.00000
## 437 unemployed basic.9y divorced no 238.28571
## 438 unemployed basic.9y divorced yes 915.00000
## 439 unemployed basic.9y married no 225.19492
## 440 unemployed basic.9y married yes 400.88889
## 441 unemployed basic.9y single no 263.36842
## 442 unemployed basic.9y single yes 342.16667
## 443 unemployed basic.9y unknown no 208.50000
## 444 unemployed high.school divorced no 177.82500
## 445 unemployed high.school divorced yes 715.00000
## 446 unemployed high.school married no 206.69466
## 447 unemployed high.school married yes 449.00000
## 448 unemployed high.school single no 193.64151
## 449 unemployed high.school single yes 569.75000
## 450 unemployed high.school unknown no 73.00000
## 451 unemployed professional.course divorced no 295.94737
## 452 unemployed professional.course married no 209.45238
## 453 unemployed professional.course married yes 620.50000
## 454 unemployed professional.course single no 216.10526
## 455 unemployed professional.course single yes 420.50000
## 456 unemployed university.degree divorced no 198.95000
## 457 unemployed university.degree divorced yes 792.50000
## 458 unemployed university.degree married no 228.37600
## 459 unemployed university.degree married yes 363.42857
## 460 unemployed university.degree single no 169.64474
## 461 unemployed university.degree single yes 300.26087
## 462 unemployed university.degree unknown no 133.00000
## 463 unemployed unknown divorced no 227.00000
## 464 unemployed unknown married no 247.20000
## 465 unemployed unknown married yes 256.33333
## 466 unemployed unknown single no 162.33333
## 467 unemployed unknown single yes 747.50000
## 468 unknown basic.4y divorced no 238.00000
## 469 unknown basic.4y married no 198.52632
## 470 unknown basic.4y married yes 687.75000
## 471 unknown basic.4y single no 232.14286
## 472 unknown basic.6y married no 227.35294
## 473 unknown basic.6y single no 522.75000
## 474 unknown basic.6y single yes 878.00000
## 475 unknown basic.9y married no 233.80000
## 476 unknown basic.9y married yes 154.00000
## 477 unknown basic.9y single no 195.86667
## 478 unknown high.school divorced no 92.50000
## 479 unknown high.school married no 191.77273
## 480 unknown high.school single no 309.33333
## 481 unknown high.school single yes 458.00000
## 482 unknown professional.course married no 221.11111
## 483 unknown professional.course single no 84.50000
## 484 unknown professional.course single yes 2029.00000
## 485 unknown university.degree divorced no 106.00000
## 486 unknown university.degree married no 222.57895
## 487 unknown university.degree married yes 164.00000
## 488 unknown university.degree single no 182.90909
## 489 unknown university.degree single yes 633.66667
## 490 unknown university.degree unknown no 99.00000
## 491 unknown university.degree unknown yes 617.00000
## 492 unknown unknown divorced no 135.50000
## 493 unknown unknown divorced yes 214.66667
## 494 unknown unknown married no 177.66327
## 495 unknown unknown married yes 279.40000
## 496 unknown unknown single no 311.27273
## 497 unknown unknown single yes 712.50000
## 498 unknown unknown unknown no 123.33333
## 499 unknown unknown unknown yes 541.00000
# Decision Trees - Classification - Prediction
library(caret)
## Loading required package: lattice
inTrain = createDataPartition(y= clean_data$response, p=0.7, list = FALSE)
train_data = clean_data[inTrain,]
nrow(train_data)
## [1] 28832
test_data = clean_data[-inTrain,]
nrow(test_data)
## [1] 12356
table(clean_data$response)
##
## no yes
## 36548 4640
library(ROSE)
## Loaded ROSE 0.0-3
nrow(train_data)/2
## [1] 14416
undersampled.train_data = ovun.sample(response ~ ., data = train_data, N=nrow(train_data), p=0.5, seed=1, method="both")$data
table(undersampled.train_data$response)
##
## no yes
## 14517 14315
library(rpart)
library(rpart.plot)
library(rattle)
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
tree_undersample <- rpart(response ~ ., method = "class",
data = undersampled.train_data,
parms= list(prior = c(0.75,0.25)),
control= rpart.control(cp= 0.001))
fancyRpartPlot(tree_undersample)
## Warning: labs do not fit even at cex 0.15, there may be some overplotting
# Create an index for of the row with the minimum xerror
index <- which.min(tree_undersample$cptable[ , "xerror"])
# Create tree_min
tree_min = (tree_undersample$cptable[,"CP"])[index]
# Prune the tree using tree_min
ptree_prior <- prune(tree_undersample, cp = tree_min)
# Use prp() to plot the pruned tree
prp(ptree_prior)
fancyRpartPlot(ptree_prior)
## Warning: labs do not fit even at cex 0.15, there may be some overplotting
pred_undersample <- predict(tree_undersample,newdata= test_data, type = "class")
confmat_undersample <- table(test_data$response,pred_undersample)
confmat_undersample
## pred_undersample
## no yes
## no 9964 1000
## yes 287 1105
confmat = confusionMatrix(test_data$response,pred_undersample)
confmat
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 9964 1000
## yes 287 1105
##
## Accuracy : 0.8958
## 95% CI : (0.8903, 0.9012)
## No Information Rate : 0.8296
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5742
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9720
## Specificity : 0.5249
## Pos Pred Value : 0.9088
## Neg Pred Value : 0.7938
## Prevalence : 0.8296
## Detection Rate : 0.8064
## Detection Prevalence : 0.8873
## Balanced Accuracy : 0.7485
##
## 'Positive' Class : no
##
acc_undersample <- sum(diag(confmat_undersample)) / nrow(test_data)
acc_undersample
## [1] 0.8958401
# Predcting response using GRADIENT BOOSTING METHODs
trctrl = trainControl(method = "cv", number = 5)
gbm_model = train(response~., data = train_data, method = "gbm", trControl = trctrl,verbose= FALSE)
## Loading required package: gbm
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1.1
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
gbm_pred = predict(gbm_model,test_data)
confusionMatrix(gbm_pred,test_data$response)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 10606 680
## yes 358 712
##
## Accuracy : 0.916
## 95% CI : (0.911, 0.9208)
## No Information Rate : 0.8873
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5326
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9673
## Specificity : 0.5115
## Pos Pred Value : 0.9397
## Neg Pred Value : 0.6654
## Prevalence : 0.8873
## Detection Rate : 0.8584
## Detection Prevalence : 0.9134
## Balanced Accuracy : 0.7394
##
## 'Positive' Class : no
##
# Operating with RANDOM FORESTS
#rf_model = train(response~., data = train_data, method = "rf", ntree = 10, trControl = trctrl)
#rf_pred = predict(rf_model,test_data)
#confusionMatrix(rf_pred,test_data$response)
library(survival)
#clean_data$pdays=ifelse(clean_data$pdays =='999', 0,clean_data$pdays)
#attach(clean_data)
with(clean_data, {
time <- campaign
levels(response) = c(0,1)
event <- as.numeric(response)
group<- clean_data$poutcome
# Descriptive statistics
summary(time)
summary(event)
# Kaplan-Meier non-parametric analysis
kmsurvival <- survfit(Surv(time,event) ~ 1)
summary(kmsurvival)
plot(kmsurvival, xlab="campaign", conf.int = FALSE, ylab="Survival Probability")
title('Campaign vs. Response')
kmsurvival1 <- survfit(Surv(time,event) ~ group)
summary(kmsurvival1)
plot(kmsurvival1, xlab="campaign", col=c('blue','red','green'),
ylab="Survival Probability by poutcome", mark.time = TRUE)
legend('bottom', c('failure','nonexistant','success'), col=c('blue','red','green'), lty =1)
title('Campaign, Response group by poutcome')
})