book_train <- read_excel("BBBC-Train.xlsx")
book_test <- read_excel("BBBC-Test.xlsx")
book_train = book_train[,-1]
book_test = book_test[,-1]
str(book_train)
## tibble [1,600 x 11] (S3: tbl_df/tbl/data.frame)
## $ Choice : num [1:1600] 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender : num [1:1600] 1 1 1 1 0 1 1 0 1 1 ...
## $ Amount_purchased: num [1:1600] 113 418 336 180 320 268 198 280 393 138 ...
## $ Frequency : num [1:1600] 8 6 18 16 2 4 2 6 12 10 ...
## $ Last_purchase : num [1:1600] 1 11 6 5 3 1 12 2 11 7 ...
## $ First_purchase : num [1:1600] 8 66 32 42 18 4 62 12 50 38 ...
## $ P_Child : num [1:1600] 0 0 2 2 0 0 2 0 3 2 ...
## $ P_Youth : num [1:1600] 1 2 0 0 0 0 3 2 0 3 ...
## $ P_Cook : num [1:1600] 0 3 1 0 0 0 2 0 3 0 ...
## $ P_DIY : num [1:1600] 0 2 1 1 1 0 1 0 0 0 ...
## $ P_Art : num [1:1600] 0 3 2 1 2 0 2 0 2 1 ...
pairs(book_train)
cor(book_train)
## Choice Gender Amount_purchased Frequency
## Choice 1.000000000 -0.141558415 0.11815256 -0.2260181193
## Gender -0.141558415 1.000000000 -0.03060700 0.0321704951
## Amount_purchased 0.118152563 -0.030607000 1.00000000 0.0136664846
## Frequency -0.226018119 0.032170495 0.01366648 1.0000000000
## Last_purchase 0.141437015 -0.028963412 0.44070127 -0.0419432803
## First_purchase 0.003157481 0.001026138 0.37481393 0.4459457457
## P_Child 0.008523377 -0.041475936 0.29931372 -0.0433279437
## P_Youth 0.027608101 -0.014130306 0.18755727 -0.0095854745
## P_Cook -0.040256351 -0.026673876 0.30425340 0.0004968833
## P_DIY -0.005309265 -0.025946174 0.22331539 -0.0089634125
## P_Art 0.357688817 -0.003500037 0.27248948 -0.0613754066
## Last_purchase First_purchase P_Child P_Youth
## Choice 0.14143702 0.003157481 0.008523377 0.027608101
## Gender -0.02896341 0.001026138 -0.041475936 -0.014130306
## Amount_purchased 0.44070127 0.374813928 0.299313719 0.187557270
## Frequency -0.04194328 0.445945746 -0.043327944 -0.009585474
## Last_purchase 1.00000000 0.814674687 0.679133923 0.453258910
## First_purchase 0.81467469 1.000000000 0.544820825 0.367892128
## P_Child 0.67913392 0.544820825 1.000000000 0.174826719
## P_Youth 0.45325891 0.367892128 0.174826719 1.000000000
## P_Cook 0.67250539 0.571054792 0.294706519 0.181656640
## P_DIY 0.55816739 0.462018843 0.253837077 0.188683456
## P_Art 0.53433415 0.442082061 0.224512850 0.141751220
## P_Cook P_DIY P_Art
## Choice -0.0402563507 -0.005309265 0.357688817
## Gender -0.0266738763 -0.025946174 -0.003500037
## Amount_purchased 0.3042533969 0.223315392 0.272489483
## Frequency 0.0004968833 -0.008963412 -0.061375407
## Last_purchase 0.6725053933 0.558167395 0.534334145
## First_purchase 0.5710547918 0.462018843 0.442082061
## P_Child 0.2947065185 0.253837077 0.224512850
## P_Youth 0.1816566401 0.188683456 0.141751220
## P_Cook 1.0000000000 0.271725126 0.191680761
## P_DIY 0.2717251256 1.000000000 0.207791065
## P_Art 0.1916807611 0.207791065 1.000000000
book_train = na.omit(book_train)
book_test = na.omit(book_test)
book_train %>%
ggplot(aes(x = factor(ifelse(Choice == 1, "Book Purchased", "No Purchase" )),
fill = factor(ifelse(Gender == 0, "Female", "Male")))) +
geom_bar(stat="count", alpha = 0.8) +
stat_count(geom = "text", colour = "black", size = 3.5,
aes(label = paste("n = ", ..count..)),
position=position_stack(vjust=0.5)) +
labs(title = "Choice of Book Purchase Gender", x= "", y= "", fill="Gender")
book_lm1 = lm(Choice ~ ., data = book_train)
summary(book_lm1)
##
## Call:
## lm(formula = Choice ~ ., data = book_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9603 -0.2462 -0.1161 0.1622 1.0588
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3642284 0.0307411 11.848 < 2e-16 ***
## Gender -0.1309205 0.0200303 -6.536 8.48e-11 ***
## Amount_purchased 0.0002736 0.0001110 2.464 0.0138 *
## Frequency -0.0090868 0.0021791 -4.170 3.21e-05 ***
## Last_purchase 0.0970286 0.0135589 7.156 1.26e-12 ***
## First_purchase -0.0020024 0.0018160 -1.103 0.2704
## P_Child -0.1262584 0.0164011 -7.698 2.41e-14 ***
## P_Youth -0.0963563 0.0201097 -4.792 1.81e-06 ***
## P_Cook -0.1414907 0.0166064 -8.520 < 2e-16 ***
## P_DIY -0.1352313 0.0197873 -6.834 1.17e-11 ***
## P_Art 0.1178494 0.0194427 6.061 1.68e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3788 on 1589 degrees of freedom
## Multiple R-squared: 0.2401, Adjusted R-squared: 0.2353
## F-statistic: 50.2 on 10 and 1589 DF, p-value: < 2.2e-16
vif(book_lm1)
## Gender Amount_purchased Frequency Last_purchase
## 1.005801 1.248066 3.253860 18.770402
## First_purchase P_Child P_Youth P_Cook
## 9.685333 3.360349 1.775022 3.324928
## P_DIY P_Art
## 2.016910 2.273771
book_lm2 = lm(Choice ~ .-Last_purchase , data = book_train)
summary(book_lm2)
##
## Call:
## lm(formula = Choice ~ . - Last_purchase, data = book_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0018 -0.2482 -0.1277 0.1567 1.1035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3926595 0.0309609 12.682 < 2e-16 ***
## Gender -0.1290720 0.0203424 -6.345 2.89e-10 ***
## Amount_purchased 0.0003518 0.0001122 3.135 0.001753 **
## Frequency -0.0157943 0.0019980 -7.905 4.97e-15 ***
## First_purchase 0.0046036 0.0015884 2.898 0.003803 **
## P_Child -0.0502183 0.0126891 -3.958 7.90e-05 ***
## P_Youth -0.0225339 0.0175326 -1.285 0.198888
## P_Cook -0.0667467 0.0131127 -5.090 4.00e-07 ***
## P_DIY -0.0606486 0.0170835 -3.550 0.000396 ***
## P_Art 0.1916012 0.0167447 11.443 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3847 on 1590 degrees of freedom
## Multiple R-squared: 0.2156, Adjusted R-squared: 0.2111
## F-statistic: 48.55 on 9 and 1590 DF, p-value: < 2.2e-16
vif(book_lm2)
## Gender Amount_purchased Frequency First_purchase
## 1.005634 1.235982 2.651820 7.182666
## P_Child P_Youth P_Cook P_DIY
## 1.949849 1.307915 2.009609 1.457362
## P_Art
## 1.634878
book_lm3 = lm(Choice ~ .-Last_purchase - First_purchase , data = book_train)
summary(book_lm3)
##
## Call:
## lm(formula = Choice ~ . - Last_purchase - First_purchase, data = book_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9501 -0.2518 -0.1273 0.1509 1.1211
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3731865 0.0302933 12.319 < 2e-16 ***
## Gender -0.1263728 0.0203683 -6.204 6.99e-10 ***
## Amount_purchased 0.0003688 0.0001123 3.283 0.00105 **
## Frequency -0.0112345 0.0012344 -9.101 < 2e-16 ***
## P_Child -0.0275983 0.0100284 -2.752 0.00599 **
## P_Youth -0.0014841 0.0159946 -0.093 0.92609
## P_Cook -0.0428346 0.0102155 -4.193 2.90e-05 ***
## P_DIY -0.0384262 0.0153017 -2.511 0.01213 *
## P_Art 0.2183323 0.0140081 15.586 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3856 on 1591 degrees of freedom
## Multiple R-squared: 0.2114, Adjusted R-squared: 0.2075
## F-statistic: 53.32 on 8 and 1591 DF, p-value: < 2.2e-16
vif(book_lm3)
## Gender Amount_purchased Frequency P_Child
## 1.003526 1.232595 1.007587 1.212223
## P_Youth P_Cook P_DIY P_Art
## 1.083475 1.214043 1.163794 1.138879
par(mfrow = c(2,2))
plot(book_lm3)
Comments: We can see by both the Residuals and the standardized residuals that they do not follow a normal distribution but a binomial distribution.
book_test$PredProb = predict(book_lm3, newdata = book_test, type = "response")
book_test$Choice = as.factor(book_test$Choice)
book_test$PredChoice = ifelse(book_test$PredProb >= 0.5, 1, 0)
table(book_test$PredChoice)
##
## 0 1
## 2146 154
caret::confusionMatrix(book_test$Choice,as.factor(book_test$PredChoice))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 2005 91
## 1 141 63
##
## Accuracy : 0.8991
## 95% CI : (0.8861, 0.9111)
## No Information Rate : 0.933
## P-Value [Acc > NIR] : 1.000000
##
## Kappa : 0.2984
##
## Mcnemar's Test P-Value : 0.001295
##
## Sensitivity : 0.9343
## Specificity : 0.4091
## Pos Pred Value : 0.9566
## Neg Pred Value : 0.3088
## Prevalence : 0.9330
## Detection Rate : 0.8717
## Detection Prevalence : 0.9113
## Balanced Accuracy : 0.6717
##
## 'Positive' Class : 0
##
book_logr = glm(Choice ~., data = book_train, family = binomial)
summary(book_logr)
##
## Call:
## glm(formula = Choice ~ ., family = binomial, data = book_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.38586 -0.66728 -0.43696 -0.02242 2.72238
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3515281 0.2143839 -1.640 0.1011
## Gender -0.8632319 0.1374499 -6.280 3.38e-10 ***
## Amount_purchased 0.0018641 0.0007918 2.354 0.0186 *
## Frequency -0.0755142 0.0165937 -4.551 5.35e-06 ***
## Last_purchase 0.6117713 0.0938127 6.521 6.97e-11 ***
## First_purchase -0.0147792 0.0128027 -1.154 0.2483
## P_Child -0.8112489 0.1167067 -6.951 3.62e-12 ***
## P_Youth -0.6370422 0.1433778 -4.443 8.87e-06 ***
## P_Cook -0.9230066 0.1194814 -7.725 1.12e-14 ***
## P_DIY -0.9058697 0.1437025 -6.304 2.90e-10 ***
## P_Art 0.6861124 0.1270176 5.402 6.60e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1799.5 on 1599 degrees of freedom
## Residual deviance: 1392.2 on 1589 degrees of freedom
## AIC: 1414.2
##
## Number of Fisher Scoring iterations: 5
vif(book_logr)
## Gender Amount_purchased Frequency Last_purchase
## 1.023359 1.232172 2.490447 17.706670
## First_purchase P_Child P_Youth P_Cook
## 9.247748 2.992269 1.761546 3.229097
## P_DIY P_Art
## 1.992698 1.938089
book_logr1 = glm(Choice ~.- Last_purchase, data = book_train, family = binomial)
summary(book_logr1)
##
## Call:
## glm(formula = Choice ~ . - Last_purchase, family = binomial,
## data = book_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.46171 -0.68074 -0.46620 -0.00855 2.80519
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1489829 0.2095375 -0.711 0.477079
## Gender -0.8302649 0.1350384 -6.148 7.83e-10 ***
## Amount_purchased 0.0022691 0.0007747 2.929 0.003399 **
## Frequency -0.1194992 0.0152620 -7.830 4.89e-15 ***
## First_purchase 0.0306235 0.0108454 2.824 0.004748 **
## P_Child -0.3456948 0.0908420 -3.805 0.000142 ***
## P_Youth -0.1789417 0.1226235 -1.459 0.144489
## P_Cook -0.4578299 0.0950443 -4.817 1.46e-06 ***
## P_DIY -0.4265209 0.1209960 -3.525 0.000423 ***
## P_Art 1.0778036 0.1144995 9.413 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1799.5 on 1599 degrees of freedom
## Residual deviance: 1437.0 on 1590 degrees of freedom
## AIC: 1457
##
## Number of Fisher Scoring iterations: 5
vif(book_logr1)
## Gender Amount_purchased Frequency First_purchase
## 1.021977 1.220305 2.173240 6.886806
## P_Child P_Youth P_Cook P_DIY
## 1.904631 1.320305 2.060140 1.462770
## P_Art
## 1.603865
book_logr2 = glm(Choice ~.- Last_purchase - First_purchase, data = book_train, family = binomial)
summary(book_logr2)
##
## Call:
## glm(formula = Choice ~ . - Last_purchase - First_purchase, family = binomial,
## data = book_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.31846 -0.69097 -0.47171 -0.02488 2.84182
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.286380 0.202966 -1.411 0.15825
## Gender -0.811948 0.134579 -6.033 1.61e-09 ***
## Amount_purchased 0.002406 0.000771 3.120 0.00181 **
## Frequency -0.088625 0.010385 -8.534 < 2e-16 ***
## P_Child -0.194796 0.072207 -2.698 0.00698 **
## P_Youth -0.031928 0.109605 -0.291 0.77082
## P_Cook -0.292392 0.072998 -4.005 6.19e-05 ***
## P_DIY -0.279282 0.108094 -2.584 0.00977 **
## P_Art 1.245842 0.099062 12.576 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1799.5 on 1599 degrees of freedom
## Residual deviance: 1445.0 on 1591 degrees of freedom
## AIC: 1463
##
## Number of Fisher Scoring iterations: 5
vif(book_logr2)
## Gender Amount_purchased Frequency P_Child
## 1.020217 1.213528 1.015899 1.215500
## P_Youth P_Cook P_DIY P_Art
## 1.081019 1.228798 1.179821 1.229491
Comments: No collinearity is left, so we can assume we can now take into consideration all these predictors for our final model.
book_test$prob.logr <- predict.glm(book_logr2, newdata = book_test, type="response")
book_test$PredChoice_logr = ifelse(book_test$prob.logr >= 0.5, 1, 0)
table(book_test$PredChoice_logr)
##
## 0 1
## 2116 184
confusionMatrix(as.factor(book_test$PredChoice_logr), as.factor(book_test$Choice), positive = '1')
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1984 132
## 1 112 72
##
## Accuracy : 0.8939
## 95% CI : (0.8806, 0.9062)
## No Information Rate : 0.9113
## P-Value [Acc > NIR] : 0.9981
##
## Kappa : 0.3134
##
## Mcnemar's Test P-Value : 0.2239
##
## Sensitivity : 0.3529
## Specificity : 0.9466
## Pos Pred Value : 0.3913
## Neg Pred Value : 0.9376
## Prevalence : 0.0887
## Detection Rate : 0.0313
## Detection Prevalence : 0.0800
## Balanced Accuracy : 0.6498
##
## 'Positive' Class : 1
##
PredProb1 = prediction(predict.glm(book_logr2, newdata = book_test, type = "response"), book_test$Choice)
# Computing threshold for cutoff to best trade off sensitivity and specificity
plot(unlist(performance(PredProb1,'sens')@x.values),unlist(performance(PredProb1,'sens')@y.values), type='l', lwd=2, ylab = "y", xlab = 'Cutoff')
mtext('Sensitivity',side=2)
mtext('Sensitivity vs. Specificity Plot for AIC Model', side=3)
# Second specificity in same plot
par(new=TRUE)
plot(unlist(performance(PredProb1,'spec')@x.values),unlist(performance(PredProb1,'spec')@y.values), type='l', lwd=2,col='red', ylab = "", xlab = 'Cutoff')
axis(4,at=seq(0,1,0.2))
mtext('Specificity',side=4, col='red')
par(new=TRUE)
min.diff <-which.min(abs(unlist(performance(PredProb1, "sens")@y.values) - unlist(performance(PredProb1, "spec")@y.values)))
min.x<-unlist(performance(PredProb1, "sens")@x.values)[min.diff]
min.y<-unlist(performance(PredProb1, "spec")@y.values)[min.diff]
optimal <-min.x
abline(h = min.y, lty = 3)
abline(v = min.x, lty = 3)
text(min.x,0,paste("optimal threshold=",round(optimal,5)), pos = 4)
book_test$PredChoice_logr_ss = ifelse(book_test$prob.logr >= 0.23, 1, 0)
table(book_test$PredChoice_logr_ss)
##
## 0 1
## 1547 753
confusionMatrix(as.factor(book_test$PredChoice_logr_ss), as.factor(book_test$Choice), positive = '1')
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1490 57
## 1 606 147
##
## Accuracy : 0.7117
## 95% CI : (0.6927, 0.7302)
## No Information Rate : 0.9113
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1948
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.72059
## Specificity : 0.71088
## Pos Pred Value : 0.19522
## Neg Pred Value : 0.96315
## Prevalence : 0.08870
## Detection Rate : 0.06391
## Detection Prevalence : 0.32739
## Balanced Accuracy : 0.71573
##
## 'Positive' Class : 1
##
book_train$Gender = as.factor(book_train$Gender)
book_train$Choice = as.factor(book_train$Choice)
book_test$Choice = as.factor(book_test$Choice)
book_test$Gender = as.factor(book_test$Gender)
set.seed(10)
form1 = Choice ~ .
tuned = tune.svm(form1, data = book_train, gamma = seq(0.001, 0.01, by = 0.005), cost = seq(0.1, 1, by = 0.1))
tuned$best.parameters
book_svm = svm(formula = form1, data = book_train, gamma = tuned$best.parameters$gamma, cost = tuned$best.parameters$cost)
summary(book_svm)
##
## Call:
## svm(formula = form1, data = book_train, gamma = tuned$best.parameters$gamma,
## cost = tuned$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 787
##
## ( 391 396 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
svmpredict = predict(book_svm, newdata = book_test, type = "response")
caret::confusionMatrix(as.factor(svmpredict), book_test$Choice, positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 2062 173
## 1 34 31
##
## Accuracy : 0.91
## 95% CI : (0.8976, 0.9214)
## No Information Rate : 0.9113
## P-Value [Acc > NIR] : 0.6049
##
## Kappa : 0.196
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.15196
## Specificity : 0.98378
## Pos Pred Value : 0.47692
## Neg Pred Value : 0.92260
## Prevalence : 0.08870
## Detection Rate : 0.01348
## Detection Prevalence : 0.02826
## Balanced Accuracy : 0.56787
##
## 'Positive' Class : 1
##
tuned_linear = tune.svm(form1, data = book_train,
gamma = seq(0.001, 0.01, by = 0.005),
cost = seq(0.1, 1, by = 0.1),
kernel = "linear")
tuned_linear$best.parameters
book_svm_linear = svm(formula = form1, data = book_train, gamma = tuned_linear$best.parameters$gamma, cost = tuned_linear$best.parameters$cost)
summary(book_svm_linear)
##
## Call:
## svm(formula = form1, data = book_train, gamma = tuned_linear$best.parameters$gamma,
## cost = tuned_linear$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.2
##
## Number of Support Vectors: 803
##
## ( 400 403 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
svmpredict_linear = predict(book_svm_linear, newdata = book_test, type = "response")
confusionMatrix(book_test$Choice, as.factor(svmpredict_linear), positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 2096 0
## 1 204 0
##
## Accuracy : 0.9113
## 95% CI : (0.8989, 0.9226)
## No Information Rate : 1
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : NA
## Specificity : 0.9113
## Pos Pred Value : NA
## Neg Pred Value : NA
## Prevalence : 0.0000
## Detection Rate : 0.0000
## Detection Prevalence : 0.0887
## Balanced Accuracy : NA
##
## 'Positive' Class : 1
##
nr_customers_tot = 50000
nr_test_customers = 2300
book.purchase.cost = 15
book.Overhead.perc = 0.45
book.price = 31.95
book.mailing.price = 0.65
book.profit = book.price - book.purchase.cost - book.purchase.cost*book.Overhead.perc
book.profit
## [1] 10.2
book_order_perc = 0.0903 #Percentage of books ordered using traditional method with 20,000 customers.
(profit_entire_customers = 50000*book_order_perc*book.profit - 50000*(1-book_order_perc)*book.mailing.price) #profit
## [1] 16487.75
pos_pred_value = 0.19522
(logreg_purchase_perc = (606 + 147)/2300) # percentage of book purchase predicted by Logistic Regression
## [1] 0.3273913
(nr_customers_logreg = round((logreg_purchase_perc*50000),0)) # number of customers targeted using Logistic Regression
## [1] 16370
(profit_log_reg = nr_customers_logreg*pos_pred_value*book.profit - nr_customers_logreg*(1-pos_pred_value)*book.mailing.price) #profit from LR model
## [1] 24033.4
pos_pred_value_svm = 0.47692
(svm_purchase_perc = (34 + 31)/2300) # percentage of book purchase predicted by SVM
## [1] 0.02826087
(nr_customers_svm = round((svm_purchase_perc*50000),0)) # number of customers targeted using SVM
## [1] 1413
(profit_svm = nr_customers_svm*pos_pred_value_svm*book.profit - nr_customers_svm*(1-pos_pred_value_svm)*book.mailing.price) #profit from SVM Model
## [1] 6393.234