Import the dataset that we prepared in Module 2:
library(readr)
## Warning: package 'readr' was built under R version 3.4.4
claimsData <- read_csv("C:/Users/joshu/Desktop/preppedClaimsData.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## RIDIT_01 = col_double(),
## RIDIT_02 = col_double(),
## RIDIT_03 = col_double(),
## RIDIT_04 = col_double(),
## RIDIT_05 = col_double(),
## RIDIT_06 = col_double(),
## RIDIT_07 = col_double(),
## RIDIT_08 = col_double(),
## RIDIT_09 = col_double(),
## RIDIT_10 = col_double(),
## RIDIT_11 = col_double(),
## RIDIT_12 = col_double(),
## RIDIT_13 = col_double(),
## RIDIT_14 = col_double(),
## RIDIT_15 = col_double(),
## RIDIT_16 = col_double(),
## RIDIT_17 = col_double(),
## RIDIT_18 = col_double(),
## RIDIT_19 = col_double(),
## RIDIT_20 = col_double()
## )
## See spec(...) for full column specifications.
str(claimsData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 502 obs. of 45 variables:
## $ Claim_Number : int 5001463 5004844 5005493 5007366 5011314 5016984 5021876 5023456 5024273 5029392 ...
## $ Policy_ID : int 364697 426960 426313 351603 423014 419258 415367 365027 346972 351192 ...
## $ CLAIM_AMOUNT : int 13463 1246 19883 16348 2477 37365 18926 12990 29493 5255 ...
## $ PAID_AMOUNT : int 13463 1246 19883 16348 2477 37365 18926 12990 29493 5255 ...
## $ CLAIM_SUSPICION_SCORE: int 3 3 3 3 2 3 3 3 3 2 ...
## $ IND_01 : int 1 1 1 1 5 1 1 2 2 2 ...
## $ IND_02 : int 1 2 1 1 5 2 1 1 1 2 ...
## $ IND_03 : int 1 1 4 2 3 1 1 3 4 3 ...
## $ IND_04 : int 4 4 1 2 1 1 5 5 1 1 ...
## $ IND_05 : int 5 1 1 1 1 5 1 4 2 1 ...
## $ IND_06 : int 3 1 1 2 1 2 2 5 4 1 ...
## $ IND_07 : int 3 5 1 2 5 5 2 1 2 2 ...
## $ IND_08 : int 1 1 2 3 1 2 2 2 1 1 ...
## $ IND_09 : int 2 2 3 1 3 1 1 5 1 1 ...
## $ IND_10 : int 2 1 5 2 2 1 1 3 1 1 ...
## $ IND_11 : int 1 1 4 1 1 2 4 3 1 1 ...
## $ IND_12 : int 3 5 5 1 1 1 2 3 1 3 ...
## $ IND_13 : int 5 1 1 1 2 1 4 2 2 1 ...
## $ IND_14 : int 2 2 1 1 1 1 1 2 2 2 ...
## $ IND_15 : int 1 2 4 5 1 1 3 1 1 1 ...
## $ IND_16 : int 2 1 1 4 1 1 1 2 1 3 ...
## $ IND_17 : int 4 1 1 2 1 5 1 1 1 2 ...
## $ IND_18 : int 1 5 1 1 1 1 1 1 2 1 ...
## $ IND_19 : int 2 1 3 2 1 1 1 4 3 2 ...
## $ IND_20 : int 3 2 1 1 1 2 1 1 2 1 ...
## $ RIDIT_01 : num -0.504 -0.504 -0.504 -0.504 0.92 ...
## $ RIDIT_02 : num -0.506 0.229 -0.506 -0.506 0.904 ...
## $ RIDIT_03 : num -0.47 -0.47 0.799 0.323 0.677 ...
## $ RIDIT_04 : num 0.789 0.789 -0.492 0.261 -0.492 ...
## $ RIDIT_05 : num 0.896 -0.498 -0.498 -0.498 -0.498 ...
## $ RIDIT_06 : num 0.651 -0.492 -0.492 0.279 -0.492 ...
## $ RIDIT_07 : num 0.592 0.914 -0.528 0.209 0.914 ...
## $ RIDIT_08 : num -0.504 -0.504 0.265 0.625 -0.504 ...
## $ RIDIT_09 : num 0.231 0.231 0.606 -0.514 0.606 ...
## $ RIDIT_10 : num 0.315 -0.456 0.916 0.315 0.315 ...
## $ RIDIT_11 : num -0.514 -0.514 0.743 -0.514 -0.514 ...
## $ RIDIT_12 : num 0.635 0.92 0.92 -0.486 -0.486 ...
## $ RIDIT_13 : num 0.93 -0.496 -0.496 -0.496 0.269 ...
## $ RIDIT_14 : num 0.237 0.237 -0.514 -0.514 -0.514 ...
## $ RIDIT_15 : num -0.492 0.257 0.797 0.93 -0.492 ...
## $ RIDIT_16 : num 0.263 -0.5 -0.5 0.805 -0.5 ...
## $ RIDIT_17 : num 0.755 -0.508 -0.508 0.241 -0.508 ...
## $ RIDIT_18 : num -0.498 0.898 -0.498 -0.498 -0.498 ...
## $ RIDIT_19 : num 0.301 -0.478 0.653 0.301 -0.478 ...
## $ RIDIT_20 : num 0.624 0.245 -0.508 -0.508 -0.508 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 45
## .. ..$ Claim_Number : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Policy_ID : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ CLAIM_AMOUNT : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ PAID_AMOUNT : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ CLAIM_SUSPICION_SCORE: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_01 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_02 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_03 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_04 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_05 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_06 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_07 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_08 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_09 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_10 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_11 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_12 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_13 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_14 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_15 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_16 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_17 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_18 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_19 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ IND_20 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ RIDIT_01 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_02 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_03 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_04 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_05 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_06 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_07 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_08 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_09 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_10 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_11 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_12 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_13 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_14 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_15 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_16 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_17 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_18 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_19 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ RIDIT_20 : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
Use bracket subsetting to remove variables 1-5. The left side of the comma refers to observations (rows), the right side refers to variables (columns):
claimsData2 <- claimsData[1:502, 6:25]
str(claimsData2)
## Classes 'tbl_df', 'tbl' and 'data.frame': 502 obs. of 20 variables:
## $ IND_01: int 1 1 1 1 5 1 1 2 2 2 ...
## $ IND_02: int 1 2 1 1 5 2 1 1 1 2 ...
## $ IND_03: int 1 1 4 2 3 1 1 3 4 3 ...
## $ IND_04: int 4 4 1 2 1 1 5 5 1 1 ...
## $ IND_05: int 5 1 1 1 1 5 1 4 2 1 ...
## $ IND_06: int 3 1 1 2 1 2 2 5 4 1 ...
## $ IND_07: int 3 5 1 2 5 5 2 1 2 2 ...
## $ IND_08: int 1 1 2 3 1 2 2 2 1 1 ...
## $ IND_09: int 2 2 3 1 3 1 1 5 1 1 ...
## $ IND_10: int 2 1 5 2 2 1 1 3 1 1 ...
## $ IND_11: int 1 1 4 1 1 2 4 3 1 1 ...
## $ IND_12: int 3 5 5 1 1 1 2 3 1 3 ...
## $ IND_13: int 5 1 1 1 2 1 4 2 2 1 ...
## $ IND_14: int 2 2 1 1 1 1 1 2 2 2 ...
## $ IND_15: int 1 2 4 5 1 1 3 1 1 1 ...
## $ IND_16: int 2 1 1 4 1 1 1 2 1 3 ...
## $ IND_17: int 4 1 1 2 1 5 1 1 1 2 ...
## $ IND_18: int 1 5 1 1 1 1 1 1 2 1 ...
## $ IND_19: int 2 1 3 2 1 1 1 4 3 2 ...
## $ IND_20: int 3 2 1 1 1 2 1 1 2 1 ...
Create a new variable rid to inspect RIDIT-transformed variables:
rid <- data.frame("RIDIT" = cbind("_01"= 2*(0 + 0.5*table(claimsData2$IND_01)[1]/502) -
1,"i2"=2*(table(claimsData2$IND_01)[1]/502 + 0.5*table(claimsData2$IND_01)[2]/502) - 1,"i5"=2*(table(claimsData2$IND_02)[1]/502 +
table(claimsData2$IND_02)[2]/502+table(claimsData2$IND_02)[3]/502+table(claimsData2$IND_02)[4]/502+0.5*table(claimsData2$IND_02)[5]/502) - 1))
rid
## RIDIT._01 RIDIT.i2 RIDIT.i5
## 1 -0.5039841 0.2788845 0.9043825
Confirm that each RIDIT transformation results in appropriate directionality for the PRIDIT scoring method.
Generate scatterplot matrix of variance versus RIDIT score:
#Isolate the RIDIT-transformed variables only:
myRidit <- claimsData[1:502, 26:45]
#Perform principal component analysis:
myRiditPCA <- princomp(myRidit)
summary(myRiditPCA)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 0.6204574 0.60989278 0.5935317 0.58715941
## Proportion of Variance 0.0676317 0.06534816 0.0618891 0.06056733
## Cumulative Proportion 0.0676317 0.13297987 0.1948690 0.25543630
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.57802699 0.57255472 0.56235383 0.55467705
## Proportion of Variance 0.05869791 0.05759177 0.05555789 0.05405138
## Cumulative Proportion 0.31413421 0.37172598 0.42728386 0.48133524
## Comp.9 Comp.10 Comp.11 Comp.12
## Standard deviation 0.55002517 0.54144035 0.53693981 0.51613035
## Proportion of Variance 0.05314856 0.05150242 0.05064978 0.04679993
## Cumulative Proportion 0.53448380 0.58598622 0.63663601 0.68343594
## Comp.13 Comp.14 Comp.15 Comp.16
## Standard deviation 0.51239584 0.50182463 0.49239670 0.48169212
## Proportion of Variance 0.04612513 0.04424155 0.04259481 0.04076294
## Cumulative Proportion 0.72956106 0.77380261 0.81639742 0.85716036
## Comp.17 Comp.18 Comp.19 Comp.20
## Standard deviation 0.47209012 0.45779413 0.4401871 0.43226189
## Proportion of Variance 0.03915401 0.03681856 0.0340409 0.03282618
## Cumulative Proportion 0.89631437 0.93313293 0.9671738 1.00000000
str(myRiditPCA)
## List of 7
## $ sdev : Named num [1:20] 0.62 0.61 0.594 0.587 0.578 ...
## ..- attr(*, "names")= chr [1:20] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ loadings: loadings [1:20, 1:20] 0.3819 -0.09 0.0291 -0.241 -0.0527 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:20] "RIDIT_01" "RIDIT_02" "RIDIT_03" "RIDIT_04" ...
## .. ..$ : chr [1:20] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ center : Named num [1:20] -1.95e-10 -3.35e-10 -1.43e-10 -2.27e-10 1.99e-11 ...
## ..- attr(*, "names")= chr [1:20] "RIDIT_01" "RIDIT_02" "RIDIT_03" "RIDIT_04" ...
## $ scale : Named num [1:20] 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "names")= chr [1:20] "RIDIT_01" "RIDIT_02" "RIDIT_03" "RIDIT_04" ...
## $ n.obs : int 502
## $ scores : num [1:502, 1:20] -0.547 0.227 -0.576 -0.42 1.103 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:20] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ call : language princomp(x = myRidit)
## - attr(*, "class")= chr "princomp"
#Generate Scatterplot Matrix
plot(myRiditPCA)
Prepare three graphical visualizations of the results of the PRIDIT scoring method to the claims dile.
First, produce an R screeplot of the principal component analysis and report the ID of which IND components are needed to summarize the data.
screeplot(myRiditPCA)
Produce a variables factor map using the FactoMineR package and report which IND variables show strong correlation:
install.packages("FactoMineR", repos="https:/cran.rstudio.com")
## Installing package into 'C:/Users/joshu/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https:/cran.rstudio.com/src/contrib:
## scheme not supported in URL 'https:/cran.rstudio.com/src/contrib/PACKAGES'
## Warning: package 'FactoMineR' is not available (for R version 3.4.3)
## Warning: unable to access index for repository https:/cran.rstudio.com/bin/windows/contrib/3.4:
## scheme not supported in URL 'https:/cran.rstudio.com/bin/windows/contrib/3.4/PACKAGES'
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 3.4.4
res.pca <- PCA(claimsData[,6:25], scale.unit=TRUE, ncp=5, graph=T)
claimsData$CS2 <- as.factor(claimsData$CLAIM_SUSPICION_SCORE)
claimsData3 <- claimsData[c(6:25, 46)]
res.pca3 = PCA(claimsData3[1:21], scale.unit=TRUE, ncp=5, quali.sup=21, graph=T)
plotellipses(res.pca3,21)
dimdesc(res.pca3, axes = c(1,2))
## $Dim.1
## $Dim.1$quanti
## correlation p.value
## IND_15 0.4137257 3.531264e-22
## IND_12 0.3883859 1.603809e-19
## IND_05 0.3700536 9.749207e-18
## IND_19 0.3241380 9.599476e-14
## IND_16 0.2808956 1.480954e-10
## IND_17 0.2492469 1.512722e-08
## IND_03 0.2222806 4.880961e-07
## IND_08 0.2188547 7.366473e-07
## IND_04 0.2127891 1.502255e-06
## IND_10 0.2101967 2.024415e-06
## IND_11 0.2036375 4.235536e-06
## IND_13 0.1656488 1.930413e-04
## IND_18 -0.2461701 2.297015e-08
## IND_07 -0.2487519 1.618487e-08
## IND_14 -0.2766059 2.873733e-10
## IND_01 -0.4571870 2.693492e-27
##
## $Dim.1$quali
## R2 p.value
## CS2 0.0877466 2.797239e-09
##
## $Dim.1$category
## Estimate p.value
## 4 0.9068265 1.171975e-06
## 5 1.7950570 4.567370e-04
## 1 -1.6909579 1.294066e-02
## 3 -0.2863728 9.835442e-03
##
##
## $Dim.2
## $Dim.2$quanti
## correlation p.value
## IND_09 0.5811923 1.086625e-46
## IND_19 0.4236294 2.793044e-23
## IND_17 0.4148013 2.691696e-22
## IND_16 0.2012068 5.534886e-06
## IND_07 0.1609693 2.932106e-04
## IND_18 0.1243927 5.255170e-03
## IND_01 0.1132449 1.111255e-02
## IND_02 0.1096183 1.399763e-02
## IND_13 0.0988602 2.676662e-02
## IND_15 -0.1883979 2.150101e-05
## IND_14 -0.1895045 1.918932e-05
## IND_20 -0.1955243 1.021693e-05
## IND_04 -0.3100314 1.204495e-12
## IND_08 -0.5061809 5.089833e-34