#ABSTRACT/INTRODUCTION
#This data is about direct markerting case from an insurance sector which was trying to predict
#policy ownership. It is about predicting who would be interested in buying a caravan insurance policy.
#This data set was used in the second edition of the Computational Intelligence and Learning(CoIL)
#competition challenge in the year 2000,organised by CoIL cluster, which is a cooperation between four EU funded
#Networks of Excellence which represent the areas of neural networks(NeuroNet),fuzzy systems(ERUDIT),
#evolutionary computing(EvoNet) and machine learning(MLNet) and it is owned and donated by Peter van der Putten
# of the Dutch data mining company sEntient Machine Research, Baarsjesweg 224 1058 AA Amsterdam
#and is based on real world business problem
#Relevant Papers
#P.van der Putten and M van Someren (eds). CoIL Challenge 2000: The insurance Company Case.
# Published by Sentient Machine Research, Amsterdam. Also a leiden Institute of Advanced Computer Science Technical report 2000-09.
#June 22nd 2000.
pacman::p_load(pacman,dplyr,GGally,ggvis,ggthemes,ggplot2,
rio,lubridate,shiny,tidyr,plotly,psych,rmarkdown,httr,tidyverse,stringr)
library(ISLR)
library(tidyverse)
library(ggplot2)
Caravan<-Caravan
head(Caravan)
## MOSTYPE MAANTHUI MGEMOMV MGEMLEEF MOSHOOFD MGODRK MGODPR MGODOV MGODGE MRELGE
## 1 33 1 3 2 8 0 5 1 3 7
## 2 37 1 2 2 8 1 4 1 4 6
## 3 37 1 2 2 8 0 4 2 4 3
## 4 9 1 3 3 3 2 3 2 4 5
## 5 40 1 4 2 10 1 4 1 4 7
## 6 23 1 2 1 5 0 5 0 5 0
## MRELSA MRELOV MFALLEEN MFGEKIND MFWEKIND MOPLHOOG MOPLMIDD MOPLLAAG MBERHOOG
## 1 0 2 1 2 6 1 2 7 1
## 2 2 2 0 4 5 0 5 4 0
## 3 2 4 4 4 2 0 5 4 0
## 4 2 2 2 3 4 3 4 2 4
## 5 1 2 2 4 4 5 4 0 0
## 6 6 3 3 5 2 0 5 4 2
## MBERZELF MBERBOER MBERMIDD MBERARBG MBERARBO MSKA MSKB1 MSKB2 MSKC MSKD
## 1 0 1 2 5 2 1 1 2 6 1
## 2 0 0 5 0 4 0 2 3 5 0
## 3 0 0 7 0 2 0 5 0 4 0
## 4 0 0 3 1 2 3 2 1 4 0
## 5 5 4 0 0 0 9 0 0 0 0
## 6 0 0 4 2 2 2 2 2 4 2
## MHHUUR MHKOOP MAUT1 MAUT2 MAUT0 MZFONDS MZPART MINKM30 MINK3045 MINK4575
## 1 1 8 8 0 1 8 1 0 4 5
## 2 2 7 7 1 2 6 3 2 0 5
## 3 7 2 7 0 2 9 0 4 5 0
## 4 5 4 9 0 0 7 2 1 5 3
## 5 4 5 6 2 1 5 4 0 0 9
## 6 9 0 5 3 3 9 0 5 2 3
## MINK7512 MINK123M MINKGEM MKOOPKLA PWAPART PWABEDR PWALAND PPERSAUT PBESAUT
## 1 0 0 4 3 0 0 0 6 0
## 2 2 0 5 4 2 0 0 0 0
## 3 0 0 3 4 2 0 0 6 0
## 4 0 0 4 4 0 0 0 6 0
## 5 0 0 6 3 0 0 0 0 0
## 6 0 0 3 3 0 0 0 6 0
## PMOTSCO PVRAAUT PAANHANG PTRACTOR PWERKT PBROM PLEVEN PPERSONG PGEZONG
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## PWAOREG PBRAND PZEILPL PPLEZIER PFIETS PINBOED PBYSTAND AWAPART AWABEDR
## 1 0 5 0 0 0 0 0 0 0
## 2 0 2 0 0 0 0 0 2 0
## 3 0 2 0 0 0 0 0 1 0
## 4 0 2 0 0 0 0 0 0 0
## 5 0 6 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## AWALAND APERSAUT ABESAUT AMOTSCO AVRAAUT AAANHANG ATRACTOR AWERKT ABROM
## 1 0 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 1 0 0 0 0 0 0 0
## 4 0 1 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 1 0 0 0 0 0 0 0
## ALEVEN APERSONG AGEZONG AWAOREG ABRAND AZEILPL APLEZIER AFIETS AINBOED
## 1 0 0 0 0 1 0 0 0 0
## 2 0 0 0 0 1 0 0 0 0
## 3 0 0 0 0 1 0 0 0 0
## 4 0 0 0 0 1 0 0 0 0
## 5 0 0 0 0 1 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## ABYSTAND Purchase
## 1 0 No
## 2 0 No
## 3 0 No
## 4 0 No
## 5 0 No
## 6 0 No
#DATADICTIONARY
#Nr Name Description Domain
#1 MOSTYPE Customer Subtype see L0
#2 MAANTHUI Number of houses 1 - 10
#3 MGEMOMV Avg size household 1 - 6
#4 MGEMLEEF Avg age see L1
#5 MOSHOOFD Customer main type see L2
#6 MGODRK Roman catholic see L3
#7 MGODPR Protestant ...
#8 MGODOV Other religion
#9 MGODGE No religion
#10 MRELGE Married
#11 MRELSA Living together
#12 MRELOV Other relation
#13 MFALLEEN Singles
#14 MFGEKIND Household without children
#15 MFWEKIND Household with children
#16 MOPLHOOG High level education
#17 MOPLMIDD Medium level education
#18 MOPLLAAG Lower level education
#19 MBERHOOG High status
#20 MBERZELF Entrepreneur
#21 MBERBOER Farmer
#22 MBERMIDD Middle management
#23 MBERARBG Skilled labourers
#24 MBERARBO Unskilled labourers
#25 MSKA Social class A
#26 MSKB1 Social class B1
#27 MSKB2 Social class B2
#28 MSKC Social class C
#29 MSKD Social class D
#30 MHHUUR Rented house
#31 MHKOOP Home owners
#32 MAUT1 1 car
#33 MAUT2 2 cars
#34 MAUT0 No car
#35 MZFONDS National Health Service
#36 MZPART Private health insurance
#37 MINKM30 Income < 30.000
#38 MINK3045 Income 30-45.000
#39 MINK4575 Income 45-75.000
#40 MINK7512 Income 75-122.000
#41 MINK123M Income >123.000
#42 MINKGEM Average income
#43 MKOOPKLA Purchasing power class
#44 PWAPART Contribution private third party insurance see L4
#45 PWABEDR Contribution third party insurance (firms) ...
#46 PWALAND Contribution third party insurane (agriculture)
#47 PPERSAUT Contribution car policie
#48 PBESAUT Contribution delivery van policies
#49 PMOTSCO Contribution motorcycle/scooter policies
#50 PVRAAUT Contribution lorry policies
#51 PAANHANG Contribution trailer policies
#52 PTRACTOR Contribution tractor policies
#53 PWERKT Contribution agricultural machines policies
#54 PBROM Contribution moped policies
#55 PLEVEN Contribution life insurances
#56 PPERSONG Contribution private accident insurance policies
#57 PGEZONG Contribution family accidents insurance policies
#58 PWAOREG Contribution disability insurance policies
#59 PBRAND Contribution fire policies
#60 PZEILPL Contribution surfboard policies
#61 PPLEZIER Contribution boat policies
#62 PFIETS Contribution bicycle policies
#63 PINBOED Contribution property insurance policies
#64 PBYSTAND Contribution social security insurance policies
#65 AWAPART Number of private third party insurance 1 - 12
#66 AWABEDR Number of third party insurance (firms) ...
#67 AWALAND Number of third party insurane (agriculture)
#68 APERSAUT Number of car policies
#69 ABESAUT Number of delivery van policies
#70 AMOTSCO Number of motorcycle/scooter policies
#71 AVRAAUT Number of lorry policies *
#72 AAANHANG Number of trailer policies *
#73 ATRACTOR Number of tractor policies
#74 AWERKT Number of agricultural machines policies
#75 ABROM Number of moped policies
#76 ALEVEN Number of life insurances
#77 APERSONG Number of private accident insurance policies
#78 AGEZONG Number of family accidents insurance policies*
#79 AWAOREG Number of disability insurance policies
#80 ABRAND Number of fire policies *
#81 AZEILPL Number of surfboard policies
#82 APLEZIER Number of boat policies *
#83 AFIETS Number of bicycle policies *
#84 AINBOED Number of property insurance policies *
#85 ABYSTAND Number of social security insurance policies *
#86 CARAVAN Number of mobile home policies 0 - 1 *
# L0:
# Value Label
# 1 High Income, expensive child
# 2 Very Important Provincials
# 3 High status seniors
# 4 Affluent senior apartments
# 5 Mixed seniors
# 6 Career and childcare
# 7 Dinki's (double income no kids)
# 8 Middle class families
# 9 Modern, complete families
# 10 Stable family
# 11 Family starters
# 12 Affluent young families
# 13 Young all american family
# 14 Junior cosmopolitan
# 15 Senior cosmopolitans
# 16 Students in apartments
# 17 Fresh masters in the city
# 18 Single youth
# 19 Suburban youth
# 20 Etnically diverse
# 21 Young urban have-nots
# 22 Mixed apartment dwellers
# 23 Young and rising
# 24 Young, low educated
# 25 Young seniors in the city
# 26 Own home elderly
# 27 Seniors in apartments
# 28 Residential elderly
# 29 Porchless seniors: no front yard
# 30 Religious elderly singles
# 31 Low income catholics
# 32 Mixed seniors
# 33 Lower class large families
# 34 Large family, employed child
# 35 Village families
# 36 Couples with teens 'Married with children'
# 37 Mixed small town dwellers
# 38 Traditional families
# 39 Large religous families
# 40 Large family farms
# 41 Mixed rurals
# L1:
#1 20-30 years
#2 30-40 years
#3 40-50 years
#4 50-60 years
#5 60-70 years
#6 70-80 years
# L2:
#1 Successful hedonists
#2 Driven Growers
#3 Average Family
#4 Career Loners
#5 Living well
#6 Cruising Seniors
#7 Retired and Religeous
#8 Family with grown ups
#9 Conservative families
#10 Farmers
# L3:
#0 0%
#1 1 - 10%
#2 11 - 23%
#3 24 - 36%
#4 37 - 49%
#5 50 - 62%
#6 63 - 75%
#7 76 - 88%
#8 89 - 99%
#9 100%
# L4:
#0 f 0
#1 f 1 - 49
#2 f 50 - 99
#3 f 100 - 199
#4 f 200 - 499
#5 f 500 - 999
#6 f 1000 - 4999
#7 f 5000 - 9999
#8 f 10.000 - 19.999
#9 f 20.000 - ?
#The Insurance Company (TIC) Benchmark
#Description
#The data contains 5822 real customer records. Each record consists of 86 variables,
#containing sociodemographic data (variables 1-43) and product ownership (variables 44-86).
#The sociodemographic data is derived from zip codes.
#All customers living in areas with the same zip code have the same sociodemographic attributes.
#Variable 86 (Purchase) indicates whether the customer purchased a caravan insurance policy.
#Further information on the individual variables can be obtained at http://www.liacs.nl/~putten/library/cc2000/data.html
#PROBLEM STATEMENT OR TASK
#Predict which customers are potentially interested in a caravan insurance policy
#(predict or Classify which customers are potentially likely to buy caravan insurance policy)
#task
#to predict whether a customer is interested in a caravan insurance policy from other data about the customer.
summary(Caravan)
## MOSTYPE MAANTHUI MGEMOMV MGEMLEEF
## Min. : 1.00 Min. : 1.000 Min. :1.000 Min. :1.000
## 1st Qu.:10.00 1st Qu.: 1.000 1st Qu.:2.000 1st Qu.:2.000
## Median :30.00 Median : 1.000 Median :3.000 Median :3.000
## Mean :24.25 Mean : 1.111 Mean :2.679 Mean :2.991
## 3rd Qu.:35.00 3rd Qu.: 1.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :41.00 Max. :10.000 Max. :5.000 Max. :6.000
## MOSHOOFD MGODRK MGODPR MGODOV
## Min. : 1.000 Min. :0.0000 Min. :0.000 Min. :0.00
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:4.000 1st Qu.:0.00
## Median : 7.000 Median :0.0000 Median :5.000 Median :1.00
## Mean : 5.774 Mean :0.6965 Mean :4.627 Mean :1.07
## 3rd Qu.: 8.000 3rd Qu.:1.0000 3rd Qu.:6.000 3rd Qu.:2.00
## Max. :10.000 Max. :9.0000 Max. :9.000 Max. :5.00
## MGODGE MRELGE MRELSA MRELOV
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.00
## 1st Qu.:2.000 1st Qu.:5.000 1st Qu.:0.0000 1st Qu.:1.00
## Median :3.000 Median :6.000 Median :1.0000 Median :2.00
## Mean :3.259 Mean :6.183 Mean :0.8835 Mean :2.29
## 3rd Qu.:4.000 3rd Qu.:7.000 3rd Qu.:1.0000 3rd Qu.:3.00
## Max. :9.000 Max. :9.000 Max. :7.0000 Max. :9.00
## MFALLEEN MFGEKIND MFWEKIND MOPLHOOG MOPLMIDD
## Min. :0.000 Min. :0.00 Min. :0.0 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:2.00 1st Qu.:3.0 1st Qu.:0.000 1st Qu.:2.000
## Median :2.000 Median :3.00 Median :4.0 Median :1.000 Median :3.000
## Mean :1.888 Mean :3.23 Mean :4.3 Mean :1.461 Mean :3.351
## 3rd Qu.:3.000 3rd Qu.:4.00 3rd Qu.:6.0 3rd Qu.:2.000 3rd Qu.:4.000
## Max. :9.000 Max. :9.00 Max. :9.0 Max. :9.000 Max. :9.000
## MOPLLAAG MBERHOOG MBERZELF MBERBOER
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :5.000 Median :2.000 Median :0.000 Median :0.0000
## Mean :4.572 Mean :1.895 Mean :0.398 Mean :0.5223
## 3rd Qu.:6.000 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :9.000 Max. :9.000 Max. :5.000 Max. :9.0000
## MBERMIDD MBERARBG MBERARBO MSKA MSKB1
## Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:1.000
## Median :3.000 Median :2.00 Median :2.000 Median :1.000 Median :2.000
## Mean :2.899 Mean :2.22 Mean :2.306 Mean :1.621 Mean :1.607
## 3rd Qu.:4.000 3rd Qu.:3.00 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :9.000 Max. :9.00 Max. :9.000 Max. :9.000 Max. :9.000
## MSKB2 MSKC MSKD MHHUUR
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:0.000 1st Qu.:2.000
## Median :2.000 Median :4.000 Median :1.000 Median :4.000
## Mean :2.203 Mean :3.759 Mean :1.067 Mean :4.237
## 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:2.000 3rd Qu.:7.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## MHKOOP MAUT1 MAUT2 MAUT0 MZFONDS
## Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:5.00 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:5.000
## Median :5.000 Median :6.00 Median :1.000 Median :2.000 Median :7.000
## Mean :4.772 Mean :6.04 Mean :1.316 Mean :1.959 Mean :6.277
## 3rd Qu.:7.000 3rd Qu.:7.00 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:8.000
## Max. :9.000 Max. :9.00 Max. :7.000 Max. :9.000 Max. :9.000
## MZPART MINKM30 MINK3045 MINK4575
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :4.000 Median :3.000
## Mean :2.729 Mean :2.574 Mean :3.536 Mean :2.731
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## MINK7512 MINK123M MINKGEM MKOOPKLA
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:3.000
## Median :0.0000 Median :0.0000 Median :4.000 Median :4.000
## Mean :0.7961 Mean :0.2027 Mean :3.784 Mean :4.236
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:4.000 3rd Qu.:6.000
## Max. :9.0000 Max. :9.0000 Max. :9.000 Max. :8.000
## PWAPART PWABEDR PWALAND PPERSAUT
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00
## Median :0.0000 Median :0.00000 Median :0.00000 Median :5.00
## Mean :0.7712 Mean :0.04002 Mean :0.07162 Mean :2.97
## 3rd Qu.:2.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:6.00
## Max. :3.0000 Max. :6.00000 Max. :4.00000 Max. :8.00
## PBESAUT PMOTSCO PVRAAUT PAANHANG
## Min. :0.00000 Min. :0.0000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.000000 Median :0.00000
## Mean :0.04827 Mean :0.1754 Mean :0.009447 Mean :0.02096
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :7.00000 Max. :7.0000 Max. :9.000000 Max. :5.00000
## PTRACTOR PWERKT PBROM PLEVEN
## Min. :0.00000 Min. :0.00000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.000 Median :0.0000
## Mean :0.09258 Mean :0.01305 Mean :0.215 Mean :0.1948
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :6.00000 Max. :6.00000 Max. :6.000 Max. :9.0000
## PPERSONG PGEZONG PWAOREG PBRAND
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :2.000
## Mean :0.01374 Mean :0.01529 Mean :0.02353 Mean :1.828
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:4.000
## Max. :6.00000 Max. :3.00000 Max. :7.00000 Max. :8.000
## PZEILPL PPLEZIER PFIETS PINBOED
## Min. :0.0000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.0008588 Mean :0.01889 Mean :0.02525 Mean :0.01563
## 3rd Qu.:0.0000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :3.0000000 Max. :6.00000 Max. :1.00000 Max. :6.00000
## PBYSTAND AWAPART AWABEDR AWALAND
## Min. :0.00000 Min. :0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000 Median :0.00000 Median :0.00000
## Mean :0.04758 Mean :0.403 Mean :0.01477 Mean :0.02061
## 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :5.00000 Max. :2.000 Max. :5.00000 Max. :1.00000
## APERSAUT ABESAUT AMOTSCO AVRAAUT
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :1.0000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.5622 Mean :0.01048 Mean :0.04105 Mean :0.002233
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :7.0000 Max. :4.00000 Max. :8.00000 Max. :3.000000
## AAANHANG ATRACTOR AWERKT ABROM
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.01254 Mean :0.03367 Mean :0.006183 Mean :0.07042
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :3.00000 Max. :4.00000 Max. :6.000000 Max. :2.00000
## ALEVEN APERSONG AGEZONG AWAOREG
## Min. :0.00000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :0.000000 Median :0.000000 Median :0.000000
## Mean :0.07661 Mean :0.005325 Mean :0.006527 Mean :0.004638
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :8.00000 Max. :1.000000 Max. :1.000000 Max. :2.000000
## ABRAND AZEILPL APLEZIER AFIETS
## Min. :0.0000 Min. :0.0000000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :1.0000 Median :0.0000000 Median :0.000000 Median :0.00000
## Mean :0.5701 Mean :0.0005153 Mean :0.006012 Mean :0.03178
## 3rd Qu.:1.0000 3rd Qu.:0.0000000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :7.0000 Max. :1.0000000 Max. :2.000000 Max. :3.00000
## AINBOED ABYSTAND Purchase
## Min. :0.000000 Min. :0.00000 No :5474
## 1st Qu.:0.000000 1st Qu.:0.00000 Yes: 348
## Median :0.000000 Median :0.00000
## Mean :0.007901 Mean :0.01426
## 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :2.000000 Max. :2.00000
#create a table of Yes/No of caravan policy purchase by customers
x<-table(Caravan$Purchase)
x # this calls the table we just created and gives the results
##
## No Yes
## 5474 348
# yes= 348 no= 5474
#the percentage of customers who purchased a caravan policy is
(348/5822)*100
## [1] 5.977327
#which is = 0.05977 or 5.977% or we can just say that out of 5822 customers only 348 bought a
# caravan insurance policy
# create a vector that will hold two colours respectively
colors=c("blue","red")
col=colors
# Bar Plot of Customers of Caravan Policy
barplot(x,main="Customers of Caravan Policy",col=colors,xlab="Caravan Policy")
box() # this adds a box around your plot

# NOW WE WANT TO PLOT PURCHASE OF CARAVAN POLICY BY CUSTOMERS AGAINST PRODUCT USAGE(POLICY OWERSHIP)DATA VARIABLES
#VARIABLE 1-NUMBER OF BOAT POLICIES
x<-table(Caravan$APLEZIER[Caravan$Purchase=="Yes"]) # this will create a table of purchase of caravan policy vs boat policies
x
##
## 0 1 2
## 335 12 1
barplot(x,col=rainbow(3),main = "PURCHACE OF CARAVAN POLICY vs NUMBER OF BOAT POLICIES",
ylab = "Number of Customers",
xlab = "Number of Boat Policies")

#customers who have not purchased a boat policy are more likely to purchase the Caravan policy
# that is 335 customers are more likely to purchase the insurance policy since they do not
# own any boat policies and and 13 are more likely to not purchase the Caravan insurance policy given that they own 1 or more Boat policies
# 2 NUMBER OF SOCIAL SECURITY INSURANCE POLICIES
x<-table(Caravan$ABYSTAND[Caravan$Purchase=="Yes"])
x
##
## 0 1
## 332 16
barplot(x,col = rainbow(2),main = "PURCHASE OF CARAVAN POLICY vs NUMBER OF SOCIAL SECURITY INSUARANCE POLICIES",
ylab = "Number of Customers",
xlab = "Number of Social Security Ins policies")

# We come to know that Customers who have not purchased a Social Secutity policy are more likely to purchase
# the caravan insurance policy. That is around 332 customers are more likely to buy our policy since they have no SS Insurance policy
# and 16 Customers are less likely to purchase our policy
# 3 Number of Property Insurance policies
x<-table(Caravan$AINBOED[Caravan$Purchase=="Yes"])
x
##
## 0 1
## 343 5
barplot(x,col = rainbow(2),
main="PURCHASE OF CARAVAN POLICY vs NUMBER OF PROPERTY INS POLICY",
ylab = "Number of Customers",
xlab = "Number of Property Ins Policies")

#343 Customers do not have Property insurance policies and thus more likely to purchase our Caravan policy
# while 5 Customers have Property insurance policy and hence more likely to not purchase our Policy
# 4 Contribution to car policies
x<-table(Caravan$PPERSAUT[Caravan$Purchase=="Yes"])
x
##
## 0 5 6
## 72 14 262
barplot(x,col=rainbow(3),
main = "PURCHASE OF CARAVAN POLICY vs CONTRIBUTION TO CAR POLICIES",
ylab = "Number of Customers",
xlab = "Contributions to Car Policies")

#we come to learn that Customers who pay a car policy premium averagely from $1000 to $4999 are more likely to
#purchase our caravan policy which is about 262 Customers
# 5 Number of fire policies
x<-table(Caravan$ABRAND[Caravan$Purchase=="Yes"])
x
##
## 0 1 2
## 109 232 7
barplot(x,col = rainbow(3),
main = "PURCHASE OF CARAVAN POLICY vs NUMBER OF FIRE POLICY",
xlab = "Number of fire policies",
ylab = "Number of Customers")

# customers who purchase only one Fire Policy are more likely to Purchase our Caravan insuranse policy
# that is around 232 Customers
# 5 Number of Life policies
x<-table(Caravan$ALEVEN[Caravan$Purchase=="Yes"])
x
##
## 0 1 2 3 4
## 325 8 10 2 3
barplot(x,col = rainbow(5),
main = "PURCHASE OF CARAVAN POLICY vs NUMBER OF LIFE POLICY",
xlab = "Number of Life policies",
ylab = "Number of Customers")
box()

#Its clear that Customers with no life policies are more likely to purchase our life policy followed by those with 2 or less
##CHARTS SHOWING PURCHASE OF CARAVAN POLICY BY CUSTOMERS AGAINST SOCIODEMOGRAPGIC DATA VARIABLES##
#1 CUSTOMER SUBTYPE
x<-table(Caravan$MOSTYPE[Caravan$Purchase=="Yes"])
x
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 20 22 23 24 25 26 27 29 30 31 32 33 34
## 13 6 25 2 2 12 3 51 12 9 9 16 13 2 4 4 5 2 1 1 2 4 6 8 46 9
## 35 36 37 38 39 41
## 8 16 10 23 19 5
barplot(x,col = rainbow(41))

#we come to learn that Customer subtype 8(Middle Class families), and Customer subtype 33(Lower Class Large Fam)
# are more likey to purchase our Caravan insurance policy followed by High status Seniours(3) and Traditional Families
#2 AVERAGE AGE
x<-table(Caravan$MGEMLEEF[Caravan$Purchase=="Yes"])
x
##
## 1 2 3 4 5 6
## 1 87 183 64 12 1
names(x)=c("20 to 30","30 to 40", "40 to 50","50 to 60","60 to 70","70 to 80") #this gives names to the chart
barplot(x,col=rainbow(6),
main="PURCHASE OF CARAVAN POLICY BY AVERAGE AGE",
xlab="AGE GROUP",
ylab = "Number of Customers")

#It becomes clear that customers belonging to age group 40-50 are more likely to purchase our Caravan insurance policy
#followed by age group 30-40.
#3 Purchasing power class
x<-table(Caravan$MKOOPKLA[Caravan$Purchase=="Yes"]) # this will create a table of purchase of caravan policy vs boat policies
x
##
## 1 2 3 4 5 6 7 8
## 18 15 71 46 30 66 67 35
barplot(x,col=rainbow(8),
main="PURCHASE OF CARAVAN POLICY BY PURCHASING POWER CLASS",
xlab="PURCHASING POWER CLASS",
ylab = "Number of Customers")

#It becomes clear that High status seniors(3) are more likely to purchase the Caravan policy followed by
# dinkis double income no kids(7) and lastly Career and childcare coming last amongst the top 3
#4 AVERAGE INCOME
x<-table(Caravan$MINKGEM[Caravan$Purchase=="Yes"]) # this will create a table of purchase of caravan policy vs boat policies
x
##
## 1 2 3 4 5 6 7 8
## 1 20 69 139 70 24 17 8
names(x)=c("$1 to $49","$50 to $99","$100 to $199","$200 to $499","$500 to $999","$1000 to $4999","$5000 to $9999","$10.000 to $19.999")
barplot(x,col=rainbow(8),
main="PURCHASE OF CARAVAN POLICY BY AVERAGE INCOME",ylab = "Number of Customers",
xlab = "Income groups")
box()

# the piechart suggests that the middle income Customers who are of the average income between $200 to $499 (4) are more likely,
# and the one's between $100-$199 and $500 to $999 are the onces more likely to purchase the Caravan policy
#Mini Conclusion based on graphs.#
#There are 5822 Customers and only 348 of them Purchased The Caravan Insurance Policy
#That is around 6% of the total Customers if we round off.
#Amongst this 6% we find that it is made up of Customers who do not own any boat policies,
#no Social Security policies,
#no Property Insurance policies
#and pay a car policy premium averagely from $1000 to $4999.
#Have only one Fire policy,
#and they are of Customer Subtypes :Middle Class families ,Lower Class Large Fam,
#followed by High status Seniours and Traditional Families,
# with their respective age groups 40-50, followed by 30-40.
#High status seniors are more likely to purchase the Caravan policy followed by
# dinkis double income no kids and lastly Career and childcare coming last amongst the top 3.
#this are the charectoristics that make up our customer base of Charectoristics of interest in clients for
# our Caravan Insurance Policy
#Modeling
library(rpart)
library(rattle)
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(RColorBrewer)
library(crossval)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(vcd)
## Loading required package: grid
##
## Attaching package: 'vcd'
## The following object is masked from 'package:ISLR':
##
## Hitters
library(Metrics)
D1 <-Caravan
D1.NEW <-D1
d1.ori<-D1.NEW
set.seed(99)
tr <- d1.ori[sample(row.names(d1.ori), size = round(nrow(d1.ori)*0.5)), ]
te <- d1.ori[!(row.names(d1.ori) %in% row.names(tr)), ]
#reset the original training and test data
tr1 <- tr
te1 <- te
te2 <-te
#zero r strategy no one will purchase
te2$Purchase <- rep(0,nrow(te2))
#building the tree
tr1$Purchase<-as.factor(tr1$Purchase)
fit1 <- rpart(formula=Purchase ~.,data=tr1,control=rpart.control(minsplit=600, minbucket=1, cp=.0008))
fit1
## n= 2911
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 2911 175 No (0.93988320 0.06011680)
## 2) PPERSAUT< 5.5 1707 47 No (0.97246632 0.02753368) *
## 3) PPERSAUT>=5.5 1204 128 No (0.89368771 0.10631229)
## 6) MOPLLAAG>=4.5 630 35 No (0.94444444 0.05555556)
## 12) PPLEZIER< 0.5 623 32 No (0.94863563 0.05136437)
## 24) MSKD< 6.5 622 31 No (0.95016077 0.04983923)
## 48) ALEVEN< 3.5 621 30 No (0.95169082 0.04830918) *
## 49) ALEVEN>=3.5 1 0 Yes (0.00000000 1.00000000) *
## 25) MSKD>=6.5 1 0 Yes (0.00000000 1.00000000) *
## 13) PPLEZIER>=0.5 7 3 No (0.57142857 0.42857143) *
## 7) MOPLLAAG< 4.5 574 93 No (0.83797909 0.16202091) *
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1285654 68.7 2313118 123.6 2313118 123.6
## Vcells 3168678 24.2 8388608 64.0 7586520 57.9
fancyRpartPlot(fit1)

#PPERSAUT-Car Policy
#MOPLLAAG- Lower Level Education
#PPLEZIER-Contribution boat policies
#MSKD-Social class D
#ALEVEN-Number of life insurances
plot(fit1)
text(fit1)

fit1$cptable[which.min(fit1$cptable[,"xerror"]),"CP"]
## [1] 0.002285714
Prediction<-predict(fit1,te1,type = "class")
# compare with base model
#update the prediction
te2$Purchase<-Prediction
Pred= factor(as.factor(te2$Purchase),c("No","Yes"),labels = c("Not Purchased","Purchased"))
Actual=factor(as.factor(te1$Purchase),c("No","Yes"),labels = c("Not Purchased","Purchased"))
table(te1$Purchase)
##
## No Yes
## 2738 173
cm1=confusionMatrix(Actual,Pred,negative = "Not Purchased")
cm1
## FP TP TN FN
## 1 1 2737 172
## attr(,"negative")
## [1] "Not Purchased"
# Corresponding accuracy, sensitivity etc.
diagnosticErrors(cm1)
## acc sens spec ppv npv lor
## 0.940570251 0.005780347 0.999634770 0.500000000 0.940873152 2.767123232
## attr(,"negative")
## [1] "Not Purchased"
#compute the classification error
ce(Actual,Pred)
## [1] 0.05942975