mydata <- read.table("TravelInsurancePrediction.csv", sep = ",")
head(mydata)
##   Age              Employment.Type GraduateOrNot AnnualIncome FamilyMembers
## 0  31            Government Sector           Yes       400000             6
## 1  31 Private Sector/Self Employed           Yes      1250000             7
## 2  34 Private Sector/Self Employed           Yes       500000             4
## 3  28 Private Sector/Self Employed           Yes       700000             3
## 4  28 Private Sector/Self Employed           Yes       700000             8
## 5  25 Private Sector/Self Employed            No      1150000             4
##   ChronicDiseases FrequentFlyer EverTravelledAbroad TravelInsurance
## 0               1            No                  No               0
## 1               0            No                  No               0
## 2               1            No                  No               1
## 3               1            No                  No               0
## 4               1           Yes                  No               0
## 5               0            No                  No               0

#Description of data: - unit ob observation: one customer - sample size: 1987 units - variables (unit of measurement) – Age - Age Of The Customer – Employment Type - The Sector In Which Customer Is Employed – GraduateOrNot - Whether The Customer Is College Graduate Or Not – AnnualIncome - The Yearly Income Of The Customer In Indian Rupees[Rounded To Nearest 50 Thousand Rupees] – FamilyMembers - Number Of Members In Customer’s Family – ChronicDisease - Whether The Customer Suffers From Any Major Disease Or Conditions Like Diabetes/High BP or Asthama,etc. – FrequentFlyer - Derived Data Based On Customer’s History Of Booking Air Tickets On At least 4 Different Instances In The Last 2 Years[2017-2019]. – EverTravelledAbroad - Has The Customer Ever Travelled To A Foreign Country[Not Necessarily Using The Company’s Services] – TravelInsurance - Did The Customer Buy Travel Insurance Package During Introductory Offering Held In The Year 2019.

#Source of data Kaggle: https://www.kaggle.com/datasets/tejashvi14/travel-insurance-prediction-data?select=TravelInsurancePrediction.csv

#Main goal of analysis (RQ) To get the sense of data (the variables) To asses correlation between the variables and taking the travel insurance or not with the idea to better recognise the target segment of consumers (which customers would be interested in buying the Insurance Package that includes Covid cover (what are the main indicators)).

#Data manipulations Changing yes/no to 0 and 1 for FrequentFlyer variable

mydata$FrequentFlyer <-ifelse(mydata$FrequentFlyer=="Yes",1,0)

head(mydata)
##   Age              Employment.Type GraduateOrNot AnnualIncome FamilyMembers
## 0  31            Government Sector           Yes       400000             6
## 1  31 Private Sector/Self Employed           Yes      1250000             7
## 2  34 Private Sector/Self Employed           Yes       500000             4
## 3  28 Private Sector/Self Employed           Yes       700000             3
## 4  28 Private Sector/Self Employed           Yes       700000             8
## 5  25 Private Sector/Self Employed            No      1150000             4
##   ChronicDiseases FrequentFlyer EverTravelledAbroad TravelInsurance
## 0               1             0                  No               0
## 1               0             0                  No               0
## 2               1             0                  No               1
## 3               1             0                  No               0
## 4               1             1                  No               0
## 5               0             0                  No               0

Order by Annual Income

mydata1.1 <- mydata[order(mydata$AnnualIncome), ]

head(mydata1.1)
##    Age   Employment.Type GraduateOrNot AnnualIncome FamilyMembers
## 31  31 Government Sector            No       300000             4
## 36  31 Government Sector            No       300000             9
## 53  28 Government Sector           Yes       300000             2
## 69  31 Government Sector            No       300000             4
## 87  28 Government Sector           Yes       300000             6
## 89  28 Government Sector           Yes       300000             8
##    ChronicDiseases FrequentFlyer EverTravelledAbroad TravelInsurance
## 31               0             0                  No               0
## 36               1             0                  No               0
## 53               0             0                  No               0
## 69               0             0                  No               0
## 87               0             0                  No               0
## 89               0             0                  No               0
tail(mydata1.1)
##      Age              Employment.Type GraduateOrNot AnnualIncome FamilyMembers
## 1955  33            Government Sector           Yes      1750000             4
## 1983  28 Private Sector/Self Employed           Yes      1750000             5
## 346   25 Private Sector/Self Employed           Yes      1800000             7
## 1123  25 Private Sector/Self Employed           Yes      1800000             6
## 1650  25 Private Sector/Self Employed           Yes      1800000             4
## 1823  25 Private Sector/Self Employed           Yes      1800000             6
##      ChronicDiseases FrequentFlyer EverTravelledAbroad TravelInsurance
## 1955               0             0                  No               0
## 1983               1             0                 Yes               0
## 346                1             1                  No               1
## 1123               1             1                  No               1
## 1650               1             1                  No               1
## 1823               0             1                  No               1

Table with only numeric data

mydata2 <- mydata[c(1, 4, 5) ]

head(mydata2)
##   Age AnnualIncome FamilyMembers
## 0  31       400000             6
## 1  31      1250000             7
## 2  34       500000             4
## 3  28       700000             3
## 4  28       700000             8
## 5  25      1150000             4

#Descriptive statistics and graphical presentation

summary(mydata2)
##       Age         AnnualIncome     FamilyMembers  
##  Min.   :25.00   Min.   : 300000   Min.   :2.000  
##  1st Qu.:28.00   1st Qu.: 600000   1st Qu.:4.000  
##  Median :29.00   Median : 900000   Median :5.000  
##  Mean   :29.65   Mean   : 932763   Mean   :4.753  
##  3rd Qu.:32.00   3rd Qu.:1250000   3rd Qu.:6.000  
##  Max.   :35.00   Max.   :1800000   Max.   :9.000

The age range is from 25 to 35 years old. On average, half of the customers is 30 (29.65) years old or less, the other half is older. Annual income of the customers ranges from 300,000 rupees to 1,800,000 rupees. The median for annual income is 900.000 rupees, and the average is a bit higher at 932.763 rupees, meaning on average half of the customers earns 932.763 rupees or less, and the other half earns more. 75 % of customers have annual income lower than 1,250,000. On average, customers have 5 (4.75) family members. The customer with least family members has a family of 2, and the customer with most family members has a family of 9.

Correlations between numerical variables

print(cor(mydata2))
##                       Age AnnualIncome FamilyMembers
## Age            1.00000000  -0.02010149    0.02740866
## AnnualIncome  -0.02010149   1.00000000   -0.01536739
## FamilyMembers  0.02740866  -0.01536739    1.00000000

Interestingly, the data shows negative correlation between age and annual income, meaning that the older the customer, the lower the annual income. There is positive correlation between age and number of family members, meaning the older the customer, the bigger it’s family. Last but not least, the data shows negative correlation between number of family members and annual income.

##Histograms … of Age, Annual Income and Number of Family Members

hist(mydata$Age,
     main="Histogram of Age",
     xlab="Age",
     xlim = range(24, 36),
     col="darkmagenta",
     freq=TRUE)

hist(mydata$AnnualIncome,
     main="Histogram of Annual Income",
     xlab="Annual Income",
     breaks = 12,
     col="yellow",
     freq=TRUE)

hist(mydata$FamilyMembers,
     main="Histogram of Number of Family Members",
     xlab="Number of Family Members",
     col="darkgreen",
     freq=TRUE)

None of our variables have normal distribution. Number of family members is skewed to the right (positively skewed), unimodal. Histograms of age and annual income are multimodal.

aggregate(mydata$AnnualIncome, list(mydata$TravelInsurance), mean)
##   Group.1         x
## 1       0  821299.9
## 2       1 1133239.4

From the above calculation we can observe that on average, customers who took the travel insurance have higher annual income.

aggregate(mydata$AnnualIncome, list(mydata$FrequentFlyer), mean)
##   Group.1         x
## 1       0  864203.8
## 2       1 1190887.3

From the above calculation we can observe that on average, customer who are frequent fliers (flew more than 4 times in the observed period) have higher income, than those who are not.

aggregate(mydata$Age, list(mydata$TravelInsurance), mean)
##   Group.1        x
## 1       0 29.51762
## 2       1 29.88873

From the above calculation we can observe that on average, age is not an important factor in consideration of buying the insurance.

#Boxplot

factor(mydata$TravelInsurance, labels = c(0,1), levels = c("No", "Yes"))
##    [1] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##   [15] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##   [29] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##   [43] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##   [57] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##   [71] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##   [85] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##   [99] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [113] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [127] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [141] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [155] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [169] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [183] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [197] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [211] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [225] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [239] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [253] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [267] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [281] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [295] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [309] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [323] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [337] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [351] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [365] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [379] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [393] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [407] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [421] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [435] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [449] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [463] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [477] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [491] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [505] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [519] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [533] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [547] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [561] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [575] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [589] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [603] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [617] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [631] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [645] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [659] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [673] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [687] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [701] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [715] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [729] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [743] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [757] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [771] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [785] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [799] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [813] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [827] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [841] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [855] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [869] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [883] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [897] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [911] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [925] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [939] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [953] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [967] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [981] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [995] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1009] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1023] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1037] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1051] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1065] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1079] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1093] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1107] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1121] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1135] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1149] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1163] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1177] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1191] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1205] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1219] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1233] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1247] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1261] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1275] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1289] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1303] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1317] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1331] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1345] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1359] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1373] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1387] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1401] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1415] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1429] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1443] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1457] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1471] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1485] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1499] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1513] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1527] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1541] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1555] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1569] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1583] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1597] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1611] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1625] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1639] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1653] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1667] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1681] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1695] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1709] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1723] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1737] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1751] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1765] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1779] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1793] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1807] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1821] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1835] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1849] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1863] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1877] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1891] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1905] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1919] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1933] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1947] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1961] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [1975] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 0 1
head(mydata)
##   Age              Employment.Type GraduateOrNot AnnualIncome FamilyMembers
## 0  31            Government Sector           Yes       400000             6
## 1  31 Private Sector/Self Employed           Yes      1250000             7
## 2  34 Private Sector/Self Employed           Yes       500000             4
## 3  28 Private Sector/Self Employed           Yes       700000             3
## 4  28 Private Sector/Self Employed           Yes       700000             8
## 5  25 Private Sector/Self Employed            No      1150000             4
##   ChronicDiseases FrequentFlyer EverTravelledAbroad TravelInsurance
## 0               1             0                  No               0
## 1               0             0                  No               0
## 2               1             0                  No               1
## 3               1             0                  No               0
## 4               1             1                  No               0
## 5               0             0                  No               0
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.0 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
ggplot(mydata, aes(y= mydata$AnnualIncome, fill = mydata$GraduateOrNot)) +
  geom_boxplot ()+
  ggtitle("Annual Income boxplot for (un)insured") +
  ylab("Annual Income") + 
  ylim(0,2000000) +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

To conclude, our target (for a campaign) should be the customers who have higher income and are frequent fliers.