setwd("C:/Users/SUBARNA/Desktop/DATA INTERN ACTIVITY")
library(readr)
project <- read_csv("project.csv")
## Parsed with column specification:
## cols(
## `Policy Number` = col_integer(),
## Age = col_integer(),
## `Age Interval` = col_character(),
## `Average Age` = col_double(),
## `Years of Driving Experience` = col_integer(),
## `Driving Experience Interval` = col_character(),
## `Avg Driving Experience` = col_double(),
## `Number of Vehicles` = col_integer(),
## Gender = col_character(),
## `Gender Dummy` = col_integer(),
## Married = col_character(),
## `Dummy Married` = col_integer(),
## `Vehicle Age` = col_integer(),
## `Vehicle Age Interval` = col_character(),
## `Avg Vehicle Age` = col_double(),
## Fuel = col_character(),
## `Dummy Fuel` = col_integer(),
## Losses = col_integer(),
## `Capped Losses` = col_integer()
## )
summary(project)
## Policy Number Age Age Interval Average Age
## Min. :100002 Min. :16.00 Length:15290 Min. :21.50
## 1st Qu.:124842 1st Qu.:24.00 Class :character 1st Qu.:21.50
## Median :149872 Median :42.00 Mode :character Median :45.50
## Mean :149910 Mean :42.33 Mean :42.71
## 3rd Qu.:175011 3rd Qu.:61.00 3rd Qu.:57.50
## Max. :200000 Max. :70.00 Max. :69.50
## Years of Driving Experience Driving Experience Interval
## Min. : 0.00 Length:15290
## 1st Qu.: 6.00 Class :character
## Median :23.00 Mode :character
## Mean :23.73
## 3rd Qu.:42.00
## Max. :53.00
## Avg Driving Experience Number of Vehicles Gender
## Min. : 5.50 Min. :1.000 Length:15290
## 1st Qu.: 5.50 1st Qu.:2.000 Class :character
## Median :17.50 Median :2.000 Mode :character
## Mean :24.46 Mean :2.496
## 3rd Qu.:41.50 3rd Qu.:3.000
## Max. :53.50 Max. :4.000
## Gender Dummy Married Dummy Married Vehicle Age
## Min. :0.0000 Length:15290 Min. :0.000 Min. : 0.000
## 1st Qu.:0.0000 Class :character 1st Qu.:0.000 1st Qu.: 6.000
## Median :0.0000 Mode :character Median :0.000 Median : 9.000
## Mean :0.4933 Mean :0.491 Mean : 8.656
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:12.000
## Max. :1.0000 Max. :1.000 Max. :15.000
## Vehicle Age Interval Avg Vehicle Age Fuel Dummy Fuel
## Length:15290 Min. : 1.500 Length:15290 Min. :0.0000
## Class :character 1st Qu.: 5.500 Class :character 1st Qu.:1.0000
## Mode :character Median : 9.500 Mode :character Median :1.0000
## Mean : 8.577 Mean :0.7632
## 3rd Qu.:13.500 3rd Qu.:1.0000
## Max. :13.500 Max. :1.0000
## Losses Capped Losses
## Min. : 13.0 Min. : 13.0
## 1st Qu.: 226.0 1st Qu.: 226.0
## Median : 355.0 Median : 355.0
## Mean : 389.9 Mean : 389.9
## 3rd Qu.: 489.0 3rd Qu.: 489.0
## Max. :3500.0 Max. :3500.0
mytable<-with(project,table(Gender))
mytable
## Gender
## F M
## 7747 7543
mytable1<-with(project,table(Married))
mytable1
## Married
## Married Single
## 7783 7507
mytable2<-with(project,table(Fuel))
mytable2
## Fuel
## D P
## 3620 11670
mytable<-xtabs(~Gender + Married,data=project)
mytable
## Married
## Gender Married Single
## F 3974 3773
## M 3809 3734
mytable1<-xtabs(~Gender+Fuel,data=project)
mytable1
## Fuel
## Gender D P
## F 1297 6450
## M 2323 5220
library(lattice)
histogram(project$`Capped Losses`,col="green",main="Distribution of dependent variable-capped loss",xlab="LOSS")
library(car)
scatterplot(project$`Capped Losses`,project$Age)
library(car)
scatterplot(project$`Capped Losses`,project$`Years of Driving Experience`)
library(car)
scatterplot(project$`Capped Losses`,project$`Vehicle Age`)
attach(project)
boxplot(`Capped Losses`~ Gender,horizontal=TRUE,col=c("green","yellow"))
attach(project)
## The following objects are masked from project (pos = 3):
##
## Age, Age Interval, Average Age, Avg Driving Experience, Avg
## Vehicle Age, Capped Losses, Driving Experience Interval, Dummy
## Fuel, Dummy Married, Fuel, Gender, Gender Dummy, Losses,
## Married, Number of Vehicles, Policy Number, Vehicle Age,
## Vehicle Age Interval, Years of Driving Experience
boxplot(`Capped Losses`~ Fuel,horizontal=TRUE,col=c("green","yellow"))
attach(project)
## The following objects are masked from project (pos = 3):
##
## Age, Age Interval, Average Age, Avg Driving Experience, Avg
## Vehicle Age, Capped Losses, Driving Experience Interval, Dummy
## Fuel, Dummy Married, Fuel, Gender, Gender Dummy, Losses,
## Married, Number of Vehicles, Policy Number, Vehicle Age,
## Vehicle Age Interval, Years of Driving Experience
## The following objects are masked from project (pos = 4):
##
## Age, Age Interval, Average Age, Avg Driving Experience, Avg
## Vehicle Age, Capped Losses, Driving Experience Interval, Dummy
## Fuel, Dummy Married, Fuel, Gender, Gender Dummy, Losses,
## Married, Number of Vehicles, Policy Number, Vehicle Age,
## Vehicle Age Interval, Years of Driving Experience
boxplot(`Capped Losses`~Married,horizontal=TRUE,col=c("blue","red"))
library(readr)
project_1_ <- read_csv("project (1).csv")
## Parsed with column specification:
## cols(
## `Policy Number` = col_integer(),
## Years_Drv_Exp = col_integer(),
## Number_Vehicles = col_integer(),
## Average_Age = col_double(),
## Gender_Dummy = col_integer(),
## Married_Dummy = col_integer(),
## Avg_Veh_Age = col_double(),
## Fuel_Type_Dummy = col_integer(),
## Losses = col_integer(),
## Capped_Losses = col_integer()
## )
View(project_1_)
cor(project_1_, method="spearman")
## Policy Number Years_Drv_Exp Number_Vehicles Average_Age
## Policy Number 1.0000000000 -0.0009310377 0.013106220 0.0001843231
## Years_Drv_Exp -0.0009310377 1.0000000000 0.009018831 0.8600265733
## Number_Vehicles 0.0131062196 0.0090188313 1.000000000 0.0072444329
## Average_Age 0.0001843231 0.8600265733 0.007244433 1.0000000000
## Gender_Dummy -0.0092164463 0.0077617030 -0.000494106 0.0031516298
## Married_Dummy -0.0050710423 0.0037748168 0.008667255 0.0006892495
## Avg_Veh_Age -0.0041069027 -0.2488155179 0.002416690 -0.2958503024
## Fuel_Type_Dummy 0.0024114611 -0.3073926188 -0.004138974 -0.3494663720
## Losses 0.0023131224 -0.4458863986 -0.002800558 -0.5135744770
## Capped_Losses 0.0023165384 -0.4458965553 -0.002786702 -0.5135878633
## Gender_Dummy Married_Dummy Avg_Veh_Age Fuel_Type_Dummy
## Policy Number -0.0092164463 -0.0050710423 -0.0041069027 0.002411461
## Years_Drv_Exp 0.0077617030 0.0037748168 -0.2488155179 -0.307392619
## Number_Vehicles -0.0004941060 0.0086672549 0.0024166896 -0.004138974
## Average_Age 0.0031516298 0.0006892495 -0.2958503024 -0.349466372
## Gender_Dummy 1.0000000000 0.0080018585 -0.0004511152 0.165300363
## Married_Dummy 0.0080018585 1.0000000000 0.0049955689 0.218715952
## Avg_Veh_Age -0.0004511152 0.0049955689 1.0000000000 -0.279067420
## Fuel_Type_Dummy 0.1653003627 0.2187159515 -0.2790674200 1.000000000
## Losses 0.1724544454 0.2689139694 -0.3239689255 0.735397504
## Capped_Losses 0.1724648397 0.2689100423 -0.3239391063 0.735414990
## Losses Capped_Losses
## Policy Number 0.002313122 0.002316538
## Years_Drv_Exp -0.445886399 -0.445896555
## Number_Vehicles -0.002800558 -0.002786702
## Average_Age -0.513574477 -0.513587863
## Gender_Dummy 0.172454445 0.172464840
## Married_Dummy 0.268913969 0.268910042
## Avg_Veh_Age -0.323968925 -0.323939106
## Fuel_Type_Dummy 0.735397504 0.735414990
## Losses 1.000000000 0.999999448
## Capped_Losses 0.999999448 1.000000000
library(corrgram)
corrgram(project_1_, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram ")
#HYPOTHESIS There is a significant difference between the male and female customers and the fuel type
mytable5<-xtabs(~Fuel+Gender,data = project)
mytable5
## Gender
## Fuel F M
## D 1297 2323
## P 6450 5220
addmargins(mytable5)
## Gender
## Fuel F M Sum
## D 1297 2323 3620
## P 6450 5220 11670
## Sum 7747 7543 15290
chisq.test(mytable5)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable5
## X-squared = 417.01, df = 1, p-value < 2.2e-16
There is a significant difference between the fuel used by male and female
t.test(mytable5)
##
## One Sample t-test
##
## data: mytable5
## t = 3.1668, df = 3, p-value = 0.0506
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -18.84727 7663.84727
## sample estimates:
## mean of x
## 3822.5
hypothesis 2:There loss made by female customers is higher than the loss made by male customers
mytable6<-xtabs(~Losses+Gender,data = project)
t.test(mytable6)
##
## One Sample t-test
##
## data: mytable6
## t = 49.857, df = 2439, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 6.019930 6.512856
## sample estimates:
## mean of x
## 6.266393
There is significant difference between the loss made by male and female customers.
hypothesis 2:There loss made by the diesel vehicle is higher than the loss made by petrol vehicle
mytable7<-xtabs(~Losses+Fuel,data = project)
t.test(mytable7)
##
## One Sample t-test
##
## data: mytable7
## t = 30.108, df = 2439, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 5.858257 6.674530
## sample estimates:
## mean of x
## 6.266393
There is a significant difference between the loss made by customers using different fuel type.
hypothesis 2:There loss made by married customers is less than the loss made by singles
mytable8<-xtabs(~Losses+Married,data = project)
t.test(mytable8)
##
## One Sample t-test
##
## data: mytable8
## t = 47.173, df = 2439, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 6.005908 6.526879
## sample estimates:
## mean of x
## 6.266393
There is a significant difference between the married and the single insurance holders