#ABSTRACT/INTRODUCTION

#This data is about direct markerting case from an insurance sector which was trying to predict
#policy ownership. It is about predicting who would be interested in buying a caravan insurance policy.
#This data set was used in the second edition of the Computational Intelligence and Learning(CoIL)
#competition challenge in the year 2000,organised by CoIL cluster, which is a cooperation between four EU funded
#Networks of Excellence which represent the areas of neural networks(NeuroNet),fuzzy systems(ERUDIT),
#evolutionary computing(EvoNet) and machine learning(MLNet) and it is owned and donated by Peter van der Putten
# of the Dutch data mining company sEntient Machine Research, Baarsjesweg 224 1058 AA Amsterdam
#and is based on real world business problem

#Relevant Papers
#P.van der Putten and M van Someren (eds). CoIL Challenge 2000: The insurance Company Case.
# Published by Sentient Machine Research, Amsterdam. Also a leiden Institute of Advanced Computer Science Technical report 2000-09.
#June 22nd 2000.

pacman::p_load(pacman,dplyr,GGally,ggvis,ggthemes,ggplot2,
               rio,lubridate,shiny,tidyr,plotly,psych,rmarkdown,httr,tidyverse,stringr)

library(ISLR)
library(tidyverse)
library(ggplot2)
Caravan<-Caravan
head(Caravan)
##   MOSTYPE MAANTHUI MGEMOMV MGEMLEEF MOSHOOFD MGODRK MGODPR MGODOV MGODGE MRELGE
## 1      33        1       3        2        8      0      5      1      3      7
## 2      37        1       2        2        8      1      4      1      4      6
## 3      37        1       2        2        8      0      4      2      4      3
## 4       9        1       3        3        3      2      3      2      4      5
## 5      40        1       4        2       10      1      4      1      4      7
## 6      23        1       2        1        5      0      5      0      5      0
##   MRELSA MRELOV MFALLEEN MFGEKIND MFWEKIND MOPLHOOG MOPLMIDD MOPLLAAG MBERHOOG
## 1      0      2        1        2        6        1        2        7        1
## 2      2      2        0        4        5        0        5        4        0
## 3      2      4        4        4        2        0        5        4        0
## 4      2      2        2        3        4        3        4        2        4
## 5      1      2        2        4        4        5        4        0        0
## 6      6      3        3        5        2        0        5        4        2
##   MBERZELF MBERBOER MBERMIDD MBERARBG MBERARBO MSKA MSKB1 MSKB2 MSKC MSKD
## 1        0        1        2        5        2    1     1     2    6    1
## 2        0        0        5        0        4    0     2     3    5    0
## 3        0        0        7        0        2    0     5     0    4    0
## 4        0        0        3        1        2    3     2     1    4    0
## 5        5        4        0        0        0    9     0     0    0    0
## 6        0        0        4        2        2    2     2     2    4    2
##   MHHUUR MHKOOP MAUT1 MAUT2 MAUT0 MZFONDS MZPART MINKM30 MINK3045 MINK4575
## 1      1      8     8     0     1       8      1       0        4        5
## 2      2      7     7     1     2       6      3       2        0        5
## 3      7      2     7     0     2       9      0       4        5        0
## 4      5      4     9     0     0       7      2       1        5        3
## 5      4      5     6     2     1       5      4       0        0        9
## 6      9      0     5     3     3       9      0       5        2        3
##   MINK7512 MINK123M MINKGEM MKOOPKLA PWAPART PWABEDR PWALAND PPERSAUT PBESAUT
## 1        0        0       4        3       0       0       0        6       0
## 2        2        0       5        4       2       0       0        0       0
## 3        0        0       3        4       2       0       0        6       0
## 4        0        0       4        4       0       0       0        6       0
## 5        0        0       6        3       0       0       0        0       0
## 6        0        0       3        3       0       0       0        6       0
##   PMOTSCO PVRAAUT PAANHANG PTRACTOR PWERKT PBROM PLEVEN PPERSONG PGEZONG
## 1       0       0        0        0      0     0      0        0       0
## 2       0       0        0        0      0     0      0        0       0
## 3       0       0        0        0      0     0      0        0       0
## 4       0       0        0        0      0     0      0        0       0
## 5       0       0        0        0      0     0      0        0       0
## 6       0       0        0        0      0     0      0        0       0
##   PWAOREG PBRAND PZEILPL PPLEZIER PFIETS PINBOED PBYSTAND AWAPART AWABEDR
## 1       0      5       0        0      0       0        0       0       0
## 2       0      2       0        0      0       0        0       2       0
## 3       0      2       0        0      0       0        0       1       0
## 4       0      2       0        0      0       0        0       0       0
## 5       0      6       0        0      0       0        0       0       0
## 6       0      0       0        0      0       0        0       0       0
##   AWALAND APERSAUT ABESAUT AMOTSCO AVRAAUT AAANHANG ATRACTOR AWERKT ABROM
## 1       0        1       0       0       0        0        0      0     0
## 2       0        0       0       0       0        0        0      0     0
## 3       0        1       0       0       0        0        0      0     0
## 4       0        1       0       0       0        0        0      0     0
## 5       0        0       0       0       0        0        0      0     0
## 6       0        1       0       0       0        0        0      0     0
##   ALEVEN APERSONG AGEZONG AWAOREG ABRAND AZEILPL APLEZIER AFIETS AINBOED
## 1      0        0       0       0      1       0        0      0       0
## 2      0        0       0       0      1       0        0      0       0
## 3      0        0       0       0      1       0        0      0       0
## 4      0        0       0       0      1       0        0      0       0
## 5      0        0       0       0      1       0        0      0       0
## 6      0        0       0       0      0       0        0      0       0
##   ABYSTAND Purchase
## 1        0       No
## 2        0       No
## 3        0       No
## 4        0       No
## 5        0       No
## 6        0       No
#DATADICTIONARY

#Nr Name Description Domain
#1 MOSTYPE Customer Subtype see L0
#2 MAANTHUI Number of houses 1 - 10
#3 MGEMOMV Avg size household 1 - 6
#4 MGEMLEEF Avg age see L1
#5 MOSHOOFD Customer main type see L2
#6 MGODRK Roman catholic see L3
#7 MGODPR Protestant ...
#8 MGODOV Other religion
#9 MGODGE No religion
#10 MRELGE Married
#11 MRELSA Living together
#12 MRELOV Other relation
#13 MFALLEEN Singles
#14 MFGEKIND Household without children
#15 MFWEKIND Household with children
#16 MOPLHOOG High level education
#17 MOPLMIDD Medium level education
#18 MOPLLAAG Lower level education
#19 MBERHOOG High status
#20 MBERZELF Entrepreneur
#21 MBERBOER Farmer
#22 MBERMIDD Middle management
#23 MBERARBG Skilled labourers
#24 MBERARBO Unskilled labourers
#25 MSKA Social class A
#26 MSKB1 Social class B1
#27 MSKB2 Social class B2
#28 MSKC Social class C
#29 MSKD Social class D
#30 MHHUUR Rented house
#31 MHKOOP Home owners
#32 MAUT1 1 car
#33 MAUT2 2 cars
#34 MAUT0 No car
#35 MZFONDS National Health Service
#36 MZPART Private health insurance
#37 MINKM30 Income < 30.000
#38 MINK3045 Income 30-45.000
#39 MINK4575 Income 45-75.000
#40 MINK7512 Income 75-122.000
#41 MINK123M Income >123.000
#42 MINKGEM Average income
#43 MKOOPKLA Purchasing power class
#44 PWAPART Contribution private third party insurance see L4
#45 PWABEDR Contribution third party insurance (firms) ...
#46 PWALAND Contribution third party insurane (agriculture)
#47 PPERSAUT Contribution car policie
#48 PBESAUT Contribution delivery van policies
#49 PMOTSCO Contribution motorcycle/scooter policies
#50 PVRAAUT Contribution lorry policies
#51 PAANHANG Contribution trailer policies
#52 PTRACTOR Contribution tractor policies
#53 PWERKT Contribution agricultural machines policies 
#54 PBROM Contribution moped policies
#55 PLEVEN Contribution life insurances
#56 PPERSONG Contribution private accident insurance policies
#57 PGEZONG Contribution family accidents insurance policies
#58 PWAOREG Contribution disability insurance policies
#59 PBRAND Contribution fire policies
#60 PZEILPL Contribution surfboard policies
#61 PPLEZIER Contribution boat policies
#62 PFIETS Contribution bicycle policies
#63 PINBOED Contribution property insurance policies
#64 PBYSTAND Contribution social security insurance policies
#65 AWAPART Number of private third party insurance 1 - 12
#66 AWABEDR Number of third party insurance (firms) ...
#67 AWALAND Number of third party insurane (agriculture)
#68 APERSAUT Number of car policies
#69 ABESAUT Number of delivery van policies
#70 AMOTSCO Number of motorcycle/scooter policies
#71 AVRAAUT Number of lorry policies *
#72 AAANHANG Number of trailer policies *
#73 ATRACTOR Number of tractor policies
#74 AWERKT Number of agricultural machines policies
#75 ABROM Number of moped policies
#76 ALEVEN Number of life insurances
#77 APERSONG Number of private accident insurance policies
#78 AGEZONG Number of family accidents insurance policies*
#79 AWAOREG Number of disability insurance policies
#80 ABRAND Number of fire policies *
#81 AZEILPL Number of surfboard policies
#82 APLEZIER Number of boat policies *
#83 AFIETS Number of bicycle policies  *
#84 AINBOED Number of property insurance policies *
#85 ABYSTAND Number of social security insurance policies *
#86 CARAVAN Number of mobile home policies 0 - 1   *

# L0:
#    Value Label
# 1 High Income, expensive child
# 2 Very Important Provincials
# 3 High status seniors
# 4 Affluent senior apartments
# 5 Mixed seniors
# 6 Career and childcare
# 7 Dinki's (double income no kids)
# 8 Middle class families
# 9 Modern, complete families
# 10 Stable family
# 11 Family starters
# 12 Affluent young families
# 13 Young all american family
# 14 Junior cosmopolitan
# 15 Senior cosmopolitans
# 16 Students in apartments
# 17 Fresh masters in the city
# 18 Single youth
# 19 Suburban youth
# 20 Etnically diverse
# 21 Young urban have-nots
# 22 Mixed apartment dwellers
# 23 Young and rising
# 24 Young, low educated 
# 25 Young seniors in the city
# 26 Own home elderly
# 27 Seniors in apartments
# 28 Residential elderly
# 29 Porchless seniors: no front yard
# 30 Religious elderly singles
# 31 Low income catholics
# 32 Mixed seniors
# 33 Lower class large families
# 34 Large family, employed child
# 35 Village families
# 36 Couples with teens 'Married with children'
# 37 Mixed small town dwellers
# 38 Traditional families
# 39 Large religous families
# 40 Large family farms
# 41 Mixed rurals

# L1:

#1 20-30 years

#2 30-40 years

#3 40-50 years

#4 50-60 years

#5 60-70 years

#6 70-80 years



# L2:

#1 Successful hedonists

#2 Driven Growers

#3 Average Family

#4 Career Loners

#5 Living well

#6 Cruising Seniors

#7 Retired and Religeous

#8 Family with grown ups

#9 Conservative families

#10 Farmers



# L3:

#0 0%

#1 1 - 10%

#2 11 - 23%

#3 24 - 36%

#4 37 - 49%

#5 50 - 62%

#6 63 - 75%

#7 76 - 88%

#8 89 - 99%

#9 100%



# L4:

#0 f 0

#1 f 1 - 49

#2 f 50 - 99

#3 f 100 - 199

#4 f 200 - 499

#5 f 500 - 999

#6 f 1000 - 4999

#7 f 5000 - 9999

#8 f 10.000 - 19.999

#9 f 20.000 - ?


#The Insurance Company (TIC) Benchmark

#Description
#The data contains 5822 real customer records. Each record consists of 86 variables, 
#containing sociodemographic data (variables 1-43) and product ownership (variables 44-86).
#The sociodemographic data is derived from zip codes.
#All customers living in areas with the same zip code have the same sociodemographic attributes.
#Variable 86 (Purchase) indicates whether the customer purchased a caravan insurance policy.
#Further information on the individual variables can be obtained at http://www.liacs.nl/~putten/library/cc2000/data.html


#PROBLEM STATEMENT OR TASK

#Predict which customers are potentially interested in a caravan insurance policy
#(predict or Classify which customers are potentially likely to buy caravan insurance policy)

#task
#to predict whether a customer is interested in a caravan insurance policy from other data about the customer.



summary(Caravan)
##     MOSTYPE         MAANTHUI         MGEMOMV         MGEMLEEF    
##  Min.   : 1.00   Min.   : 1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:10.00   1st Qu.: 1.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :30.00   Median : 1.000   Median :3.000   Median :3.000  
##  Mean   :24.25   Mean   : 1.111   Mean   :2.679   Mean   :2.991  
##  3rd Qu.:35.00   3rd Qu.: 1.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :41.00   Max.   :10.000   Max.   :5.000   Max.   :6.000  
##     MOSHOOFD          MGODRK           MGODPR          MGODOV    
##  Min.   : 1.000   Min.   :0.0000   Min.   :0.000   Min.   :0.00  
##  1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.:4.000   1st Qu.:0.00  
##  Median : 7.000   Median :0.0000   Median :5.000   Median :1.00  
##  Mean   : 5.774   Mean   :0.6965   Mean   :4.627   Mean   :1.07  
##  3rd Qu.: 8.000   3rd Qu.:1.0000   3rd Qu.:6.000   3rd Qu.:2.00  
##  Max.   :10.000   Max.   :9.0000   Max.   :9.000   Max.   :5.00  
##      MGODGE          MRELGE          MRELSA           MRELOV    
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Min.   :0.00  
##  1st Qu.:2.000   1st Qu.:5.000   1st Qu.:0.0000   1st Qu.:1.00  
##  Median :3.000   Median :6.000   Median :1.0000   Median :2.00  
##  Mean   :3.259   Mean   :6.183   Mean   :0.8835   Mean   :2.29  
##  3rd Qu.:4.000   3rd Qu.:7.000   3rd Qu.:1.0000   3rd Qu.:3.00  
##  Max.   :9.000   Max.   :9.000   Max.   :7.0000   Max.   :9.00  
##     MFALLEEN        MFGEKIND       MFWEKIND      MOPLHOOG        MOPLMIDD    
##  Min.   :0.000   Min.   :0.00   Min.   :0.0   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:2.00   1st Qu.:3.0   1st Qu.:0.000   1st Qu.:2.000  
##  Median :2.000   Median :3.00   Median :4.0   Median :1.000   Median :3.000  
##  Mean   :1.888   Mean   :3.23   Mean   :4.3   Mean   :1.461   Mean   :3.351  
##  3rd Qu.:3.000   3rd Qu.:4.00   3rd Qu.:6.0   3rd Qu.:2.000   3rd Qu.:4.000  
##  Max.   :9.000   Max.   :9.00   Max.   :9.0   Max.   :9.000   Max.   :9.000  
##     MOPLLAAG        MBERHOOG        MBERZELF        MBERBOER     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:3.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :5.000   Median :2.000   Median :0.000   Median :0.0000  
##  Mean   :4.572   Mean   :1.895   Mean   :0.398   Mean   :0.5223  
##  3rd Qu.:6.000   3rd Qu.:3.000   3rd Qu.:1.000   3rd Qu.:1.0000  
##  Max.   :9.000   Max.   :9.000   Max.   :5.000   Max.   :9.0000  
##     MBERMIDD        MBERARBG       MBERARBO          MSKA           MSKB1      
##  Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:1.00   1st Qu.:1.000   1st Qu.:0.000   1st Qu.:1.000  
##  Median :3.000   Median :2.00   Median :2.000   Median :1.000   Median :2.000  
##  Mean   :2.899   Mean   :2.22   Mean   :2.306   Mean   :1.621   Mean   :1.607  
##  3rd Qu.:4.000   3rd Qu.:3.00   3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :9.00   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##      MSKB2            MSKC            MSKD           MHHUUR     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:0.000   1st Qu.:2.000  
##  Median :2.000   Median :4.000   Median :1.000   Median :4.000  
##  Mean   :2.203   Mean   :3.759   Mean   :1.067   Mean   :4.237  
##  3rd Qu.:3.000   3rd Qu.:5.000   3rd Qu.:2.000   3rd Qu.:7.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##      MHKOOP          MAUT1          MAUT2           MAUT0          MZFONDS     
##  Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:5.00   1st Qu.:0.000   1st Qu.:1.000   1st Qu.:5.000  
##  Median :5.000   Median :6.00   Median :1.000   Median :2.000   Median :7.000  
##  Mean   :4.772   Mean   :6.04   Mean   :1.316   Mean   :1.959   Mean   :6.277  
##  3rd Qu.:7.000   3rd Qu.:7.00   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:8.000  
##  Max.   :9.000   Max.   :9.00   Max.   :7.000   Max.   :9.000   Max.   :9.000  
##      MZPART         MINKM30         MINK3045        MINK4575    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :2.000   Median :2.000   Median :4.000   Median :3.000  
##  Mean   :2.729   Mean   :2.574   Mean   :3.536   Mean   :2.731  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##     MINK7512         MINK123M         MINKGEM         MKOOPKLA    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :0.0000   Median :0.0000   Median :4.000   Median :4.000  
##  Mean   :0.7961   Mean   :0.2027   Mean   :3.784   Mean   :4.236  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.:6.000  
##  Max.   :9.0000   Max.   :9.0000   Max.   :9.000   Max.   :8.000  
##     PWAPART          PWABEDR           PWALAND           PPERSAUT   
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :5.00  
##  Mean   :0.7712   Mean   :0.04002   Mean   :0.07162   Mean   :2.97  
##  3rd Qu.:2.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:6.00  
##  Max.   :3.0000   Max.   :6.00000   Max.   :4.00000   Max.   :8.00  
##     PBESAUT           PMOTSCO          PVRAAUT            PAANHANG      
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.000000   Median :0.00000  
##  Mean   :0.04827   Mean   :0.1754   Mean   :0.009447   Mean   :0.02096  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :7.00000   Max.   :7.0000   Max.   :9.000000   Max.   :5.00000  
##     PTRACTOR           PWERKT            PBROM           PLEVEN      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.000   Median :0.0000  
##  Mean   :0.09258   Mean   :0.01305   Mean   :0.215   Mean   :0.1948  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000   3rd Qu.:0.0000  
##  Max.   :6.00000   Max.   :6.00000   Max.   :6.000   Max.   :9.0000  
##     PPERSONG          PGEZONG           PWAOREG            PBRAND     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :2.000  
##  Mean   :0.01374   Mean   :0.01529   Mean   :0.02353   Mean   :1.828  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:4.000  
##  Max.   :6.00000   Max.   :3.00000   Max.   :7.00000   Max.   :8.000  
##     PZEILPL             PPLEZIER           PFIETS           PINBOED       
##  Min.   :0.0000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.0008588   Mean   :0.01889   Mean   :0.02525   Mean   :0.01563  
##  3rd Qu.:0.0000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :3.0000000   Max.   :6.00000   Max.   :1.00000   Max.   :6.00000  
##     PBYSTAND          AWAPART         AWABEDR           AWALAND       
##  Min.   :0.00000   Min.   :0.000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000   Median :0.00000   Median :0.00000  
##  Mean   :0.04758   Mean   :0.403   Mean   :0.01477   Mean   :0.02061  
##  3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :5.00000   Max.   :2.000   Max.   :5.00000   Max.   :1.00000  
##     APERSAUT         ABESAUT           AMOTSCO           AVRAAUT        
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :1.0000   Median :0.00000   Median :0.00000   Median :0.000000  
##  Mean   :0.5622   Mean   :0.01048   Mean   :0.04105   Mean   :0.002233  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :7.0000   Max.   :4.00000   Max.   :8.00000   Max.   :3.000000  
##     AAANHANG          ATRACTOR           AWERKT             ABROM        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.000000   Median :0.00000  
##  Mean   :0.01254   Mean   :0.03367   Mean   :0.006183   Mean   :0.07042  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :4.00000   Max.   :6.000000   Max.   :2.00000  
##      ALEVEN           APERSONG           AGEZONG            AWAOREG        
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.000000   Median :0.000000   Median :0.000000  
##  Mean   :0.07661   Mean   :0.005325   Mean   :0.006527   Mean   :0.004638  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :8.00000   Max.   :1.000000   Max.   :1.000000   Max.   :2.000000  
##      ABRAND          AZEILPL             APLEZIER            AFIETS       
##  Min.   :0.0000   Min.   :0.0000000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :1.0000   Median :0.0000000   Median :0.000000   Median :0.00000  
##  Mean   :0.5701   Mean   :0.0005153   Mean   :0.006012   Mean   :0.03178  
##  3rd Qu.:1.0000   3rd Qu.:0.0000000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :7.0000   Max.   :1.0000000   Max.   :2.000000   Max.   :3.00000  
##     AINBOED            ABYSTAND       Purchase  
##  Min.   :0.000000   Min.   :0.00000   No :5474  
##  1st Qu.:0.000000   1st Qu.:0.00000   Yes: 348  
##  Median :0.000000   Median :0.00000             
##  Mean   :0.007901   Mean   :0.01426             
##  3rd Qu.:0.000000   3rd Qu.:0.00000             
##  Max.   :2.000000   Max.   :2.00000
#create a table of Yes/No of caravan policy purchase by customers
x<-table(Caravan$Purchase)
x # this calls the table we just created and gives the results
## 
##   No  Yes 
## 5474  348
# yes= 348 no= 5474
#the percentage of customers who purchased a caravan policy is
(348/5822)*100
## [1] 5.977327
#which is = 0.05977 or 5.977% or we can just say that out of 5822 customers only 348 bought a
# caravan insurance policy


# create a vector that will hold two colours respectively
colors=c("blue","red")
col=colors
# Bar Plot of Customers of Caravan Policy
barplot(x,main="Customers of Caravan Policy",col=colors,xlab="Caravan Policy")
box() # this adds a box around your plot 

# NOW WE WANT TO PLOT PURCHASE OF CARAVAN POLICY BY CUSTOMERS AGAINST PRODUCT USAGE(POLICY OWERSHIP)DATA VARIABLES

#VARIABLE 1-NUMBER OF BOAT POLICIES
x<-table(Caravan$APLEZIER[Caravan$Purchase=="Yes"]) # this will create a table of purchase of caravan policy vs boat policies
x
## 
##   0   1   2 
## 335  12   1
barplot(x,col=rainbow(3),main = "PURCHACE OF CARAVAN POLICY vs NUMBER OF BOAT POLICIES",
        ylab = "Number of Customers",
        xlab = "Number of Boat Policies")

#customers who have not purchased a boat policy are more likely to purchase the Caravan policy
# that is 335 customers are more likely to purchase the insurance policy since they do not 
# own any boat policies and and 13 are more likely to not purchase the Caravan insurance policy given that they own 1 or more Boat policies


# 2 NUMBER OF SOCIAL SECURITY INSURANCE POLICIES
x<-table(Caravan$ABYSTAND[Caravan$Purchase=="Yes"])
x
## 
##   0   1 
## 332  16
barplot(x,col = rainbow(2),main = "PURCHASE OF CARAVAN POLICY vs NUMBER OF SOCIAL SECURITY INSUARANCE POLICIES",
        ylab = "Number of Customers",
        xlab = "Number of Social Security Ins policies")

# We come to know that Customers who have not purchased a Social Secutity policy are more likely to purchase 
# the caravan insurance policy. That is around 332 customers are more likely to buy our policy since they have no SS Insurance policy
# and 16 Customers are less likely to purchase our policy

# 3 Number of Property Insurance  policies
x<-table(Caravan$AINBOED[Caravan$Purchase=="Yes"]) 
x
## 
##   0   1 
## 343   5
barplot(x,col = rainbow(2),
        main="PURCHASE OF CARAVAN POLICY vs NUMBER OF PROPERTY INS POLICY",
        ylab = "Number of Customers",
        xlab = "Number of Property Ins Policies")

#343 Customers do not have Property insurance policies and thus more likely to purchase our Caravan policy
# while 5 Customers have Property insurance policy and hence more likely to not purchase our Policy

# 4 Contribution to car policies
x<-table(Caravan$PPERSAUT[Caravan$Purchase=="Yes"]) 
x
## 
##   0   5   6 
##  72  14 262
barplot(x,col=rainbow(3),
        main = "PURCHASE OF CARAVAN POLICY vs CONTRIBUTION TO CAR POLICIES",
        ylab = "Number of Customers",
        xlab = "Contributions to Car Policies")

#we come to learn that Customers who pay a car policy premium averagely from $1000 to $4999 are more likely to 
#purchase our caravan policy which is about 262 Customers 



# 5 Number of fire policies
x<-table(Caravan$ABRAND[Caravan$Purchase=="Yes"]) 
x
## 
##   0   1   2 
## 109 232   7
barplot(x,col = rainbow(3),
        main = "PURCHASE OF CARAVAN POLICY vs NUMBER OF FIRE POLICY",
        xlab = "Number of fire policies",
        ylab = "Number of Customers")

# customers who purchase only one Fire Policy are more likely to Purchase our Caravan insuranse policy
# that is around 232 Customers 

# 5 Number of Life policies
x<-table(Caravan$ALEVEN[Caravan$Purchase=="Yes"]) 
x
## 
##   0   1   2   3   4 
## 325   8  10   2   3
barplot(x,col = rainbow(5),
        main = "PURCHASE OF CARAVAN POLICY vs NUMBER OF LIFE POLICY",
        xlab = "Number of Life policies",
        ylab = "Number of Customers")
box()

#Its clear that Customers with no life policies are more likely to purchase our life policy followed by those with 2 or less

##CHARTS SHOWING PURCHASE OF CARAVAN POLICY BY CUSTOMERS AGAINST SOCIODEMOGRAPGIC DATA VARIABLES##

#1 CUSTOMER SUBTYPE
x<-table(Caravan$MOSTYPE[Caravan$Purchase=="Yes"]) 
x
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 20 22 23 24 25 26 27 29 30 31 32 33 34 
## 13  6 25  2  2 12  3 51 12  9  9 16 13  2  4  4  5  2  1  1  2  4  6  8 46  9 
## 35 36 37 38 39 41 
##  8 16 10 23 19  5
barplot(x,col = rainbow(41))

#we come to learn that Customer subtype 8(Middle Class families), and Customer subtype 33(Lower Class Large Fam)
# are more likey to purchase our Caravan insurance policy followed by High status Seniours(3) and Traditional Families

#2 AVERAGE AGE
x<-table(Caravan$MGEMLEEF[Caravan$Purchase=="Yes"]) 
x
## 
##   1   2   3   4   5   6 
##   1  87 183  64  12   1
names(x)=c("20 to 30","30 to 40", "40 to 50","50 to 60","60 to 70","70 to 80") #this gives names to the chart
barplot(x,col=rainbow(6),
        main="PURCHASE OF CARAVAN POLICY BY AVERAGE AGE",
        xlab="AGE GROUP",
        ylab = "Number of Customers")

#It becomes clear that customers belonging to age group 40-50 are more likely to purchase our Caravan insurance policy
#followed by age group 30-40.


#3 Purchasing power class
x<-table(Caravan$MKOOPKLA[Caravan$Purchase=="Yes"]) # this will create a table of purchase of caravan policy vs boat policies
x
## 
##  1  2  3  4  5  6  7  8 
## 18 15 71 46 30 66 67 35
barplot(x,col=rainbow(8),
        main="PURCHASE OF CARAVAN POLICY BY PURCHASING POWER CLASS",
        xlab="PURCHASING POWER CLASS",
        ylab = "Number of Customers")

#It becomes clear that High status seniors(3) are more likely to purchase the Caravan policy followed by 
# dinkis double income no kids(7) and lastly Career and childcare coming last amongst the top 3


#4 AVERAGE INCOME
x<-table(Caravan$MINKGEM[Caravan$Purchase=="Yes"]) # this will create a table of purchase of caravan policy vs boat policies
x
## 
##   1   2   3   4   5   6   7   8 
##   1  20  69 139  70  24  17   8
names(x)=c("$1 to $49","$50 to $99","$100 to $199","$200 to $499","$500 to $999","$1000 to $4999","$5000 to $9999","$10.000 to $19.999")
barplot(x,col=rainbow(8),
    main="PURCHASE OF CARAVAN POLICY BY AVERAGE INCOME",ylab = "Number of Customers",
    xlab = "Income groups")
box()

# the piechart suggests that the middle income Customers who are of the average income between $200 to $499 (4) are more likely,
# and the one's between $100-$199 and $500 to $999 are the onces more likely to purchase the Caravan policy


#Mini Conclusion based on graphs.#

#There are 5822 Customers and only 348 of them Purchased The Caravan Insurance Policy

#That is around 6% of the total Customers if we round off.
#Amongst this 6% we find that it is made up of Customers who do not own any boat policies,
#no Social Security policies,
#no Property Insurance policies
#and pay a car policy premium averagely from $1000 to $4999.
#Have only one Fire policy,
#and they are of Customer Subtypes :Middle Class families ,Lower Class Large Fam, 
#followed by High status Seniours and Traditional Families,
# with their respective age groups 40-50, followed by 30-40.
#High status seniors are more likely to purchase the Caravan policy followed by 
# dinkis double income no kids and lastly Career and childcare coming last amongst the top 3.

#this are the charectoristics that make up our customer base of Charectoristics of interest in clients for 
# our Caravan Insurance Policy



#Modeling
library(rpart)
library(rattle)
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(RColorBrewer)
library(crossval)
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(vcd)
## Loading required package: grid
## 
## Attaching package: 'vcd'
## The following object is masked from 'package:ISLR':
## 
##     Hitters
library(Metrics)

D1 <-Caravan
D1.NEW <-D1
d1.ori<-D1.NEW
set.seed(99)

tr <- d1.ori[sample(row.names(d1.ori), size = round(nrow(d1.ori)*0.5)), ]
te <- d1.ori[!(row.names(d1.ori) %in% row.names(tr)), ]

#reset the original training and test data
tr1 <- tr
te1  <- te
te2 <-te

#zero r strategy no one will purchase
te2$Purchase <- rep(0,nrow(te2))

#building the tree
tr1$Purchase<-as.factor(tr1$Purchase)
fit1 <- rpart(formula=Purchase ~.,data=tr1,control=rpart.control(minsplit=600, minbucket=1, cp=.0008))
fit1
## n= 2911 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 2911 175 No (0.93988320 0.06011680)  
##    2) PPERSAUT< 5.5 1707  47 No (0.97246632 0.02753368) *
##    3) PPERSAUT>=5.5 1204 128 No (0.89368771 0.10631229)  
##      6) MOPLLAAG>=4.5 630  35 No (0.94444444 0.05555556)  
##       12) PPLEZIER< 0.5 623  32 No (0.94863563 0.05136437)  
##         24) MSKD< 6.5 622  31 No (0.95016077 0.04983923)  
##           48) ALEVEN< 3.5 621  30 No (0.95169082 0.04830918) *
##           49) ALEVEN>=3.5 1   0 Yes (0.00000000 1.00000000) *
##         25) MSKD>=6.5 1   0 Yes (0.00000000 1.00000000) *
##       13) PPLEZIER>=0.5 7   3 No (0.57142857 0.42857143) *
##      7) MOPLLAAG< 4.5 574  93 No (0.83797909 0.16202091) *
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1285654 68.7    2313118 123.6  2313118 123.6
## Vcells 3168678 24.2    8388608  64.0  7586520  57.9
fancyRpartPlot(fit1)

#PPERSAUT-Car Policy
#MOPLLAAG- Lower Level Education
#PPLEZIER-Contribution boat policies
#MSKD-Social class D
#ALEVEN-Number of life insurances

plot(fit1)
text(fit1)

fit1$cptable[which.min(fit1$cptable[,"xerror"]),"CP"]
## [1] 0.002285714
Prediction<-predict(fit1,te1,type = "class")

# compare with base model

#update the prediction
te2$Purchase<-Prediction
Pred= factor(as.factor(te2$Purchase),c("No","Yes"),labels = c("Not Purchased","Purchased"))
Actual=factor(as.factor(te1$Purchase),c("No","Yes"),labels = c("Not Purchased","Purchased"))
table(te1$Purchase)
## 
##   No  Yes 
## 2738  173
cm1=confusionMatrix(Actual,Pred,negative = "Not Purchased")
cm1
##   FP   TP   TN   FN 
##    1    1 2737  172 
## attr(,"negative")
## [1] "Not Purchased"
# Corresponding accuracy, sensitivity etc.
diagnosticErrors(cm1)
##         acc        sens        spec         ppv         npv         lor 
## 0.940570251 0.005780347 0.999634770 0.500000000 0.940873152 2.767123232 
## attr(,"negative")
## [1] "Not Purchased"
#compute the classification error
ce(Actual,Pred)
## [1] 0.05942975