Import the dataset that we prepared in Module 2:

library(readr)
## Warning: package 'readr' was built under R version 3.4.4
claimsData <- read_csv("C:/Users/joshu/Desktop/preppedClaimsData.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   RIDIT_01 = col_double(),
##   RIDIT_02 = col_double(),
##   RIDIT_03 = col_double(),
##   RIDIT_04 = col_double(),
##   RIDIT_05 = col_double(),
##   RIDIT_06 = col_double(),
##   RIDIT_07 = col_double(),
##   RIDIT_08 = col_double(),
##   RIDIT_09 = col_double(),
##   RIDIT_10 = col_double(),
##   RIDIT_11 = col_double(),
##   RIDIT_12 = col_double(),
##   RIDIT_13 = col_double(),
##   RIDIT_14 = col_double(),
##   RIDIT_15 = col_double(),
##   RIDIT_16 = col_double(),
##   RIDIT_17 = col_double(),
##   RIDIT_18 = col_double(),
##   RIDIT_19 = col_double(),
##   RIDIT_20 = col_double()
## )
## See spec(...) for full column specifications.
str(claimsData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    502 obs. of  45 variables:
##  $ Claim_Number         : int  5001463 5004844 5005493 5007366 5011314 5016984 5021876 5023456 5024273 5029392 ...
##  $ Policy_ID            : int  364697 426960 426313 351603 423014 419258 415367 365027 346972 351192 ...
##  $ CLAIM_AMOUNT         : int  13463 1246 19883 16348 2477 37365 18926 12990 29493 5255 ...
##  $ PAID_AMOUNT          : int  13463 1246 19883 16348 2477 37365 18926 12990 29493 5255 ...
##  $ CLAIM_SUSPICION_SCORE: int  3 3 3 3 2 3 3 3 3 2 ...
##  $ IND_01               : int  1 1 1 1 5 1 1 2 2 2 ...
##  $ IND_02               : int  1 2 1 1 5 2 1 1 1 2 ...
##  $ IND_03               : int  1 1 4 2 3 1 1 3 4 3 ...
##  $ IND_04               : int  4 4 1 2 1 1 5 5 1 1 ...
##  $ IND_05               : int  5 1 1 1 1 5 1 4 2 1 ...
##  $ IND_06               : int  3 1 1 2 1 2 2 5 4 1 ...
##  $ IND_07               : int  3 5 1 2 5 5 2 1 2 2 ...
##  $ IND_08               : int  1 1 2 3 1 2 2 2 1 1 ...
##  $ IND_09               : int  2 2 3 1 3 1 1 5 1 1 ...
##  $ IND_10               : int  2 1 5 2 2 1 1 3 1 1 ...
##  $ IND_11               : int  1 1 4 1 1 2 4 3 1 1 ...
##  $ IND_12               : int  3 5 5 1 1 1 2 3 1 3 ...
##  $ IND_13               : int  5 1 1 1 2 1 4 2 2 1 ...
##  $ IND_14               : int  2 2 1 1 1 1 1 2 2 2 ...
##  $ IND_15               : int  1 2 4 5 1 1 3 1 1 1 ...
##  $ IND_16               : int  2 1 1 4 1 1 1 2 1 3 ...
##  $ IND_17               : int  4 1 1 2 1 5 1 1 1 2 ...
##  $ IND_18               : int  1 5 1 1 1 1 1 1 2 1 ...
##  $ IND_19               : int  2 1 3 2 1 1 1 4 3 2 ...
##  $ IND_20               : int  3 2 1 1 1 2 1 1 2 1 ...
##  $ RIDIT_01             : num  -0.504 -0.504 -0.504 -0.504 0.92 ...
##  $ RIDIT_02             : num  -0.506 0.229 -0.506 -0.506 0.904 ...
##  $ RIDIT_03             : num  -0.47 -0.47 0.799 0.323 0.677 ...
##  $ RIDIT_04             : num  0.789 0.789 -0.492 0.261 -0.492 ...
##  $ RIDIT_05             : num  0.896 -0.498 -0.498 -0.498 -0.498 ...
##  $ RIDIT_06             : num  0.651 -0.492 -0.492 0.279 -0.492 ...
##  $ RIDIT_07             : num  0.592 0.914 -0.528 0.209 0.914 ...
##  $ RIDIT_08             : num  -0.504 -0.504 0.265 0.625 -0.504 ...
##  $ RIDIT_09             : num  0.231 0.231 0.606 -0.514 0.606 ...
##  $ RIDIT_10             : num  0.315 -0.456 0.916 0.315 0.315 ...
##  $ RIDIT_11             : num  -0.514 -0.514 0.743 -0.514 -0.514 ...
##  $ RIDIT_12             : num  0.635 0.92 0.92 -0.486 -0.486 ...
##  $ RIDIT_13             : num  0.93 -0.496 -0.496 -0.496 0.269 ...
##  $ RIDIT_14             : num  0.237 0.237 -0.514 -0.514 -0.514 ...
##  $ RIDIT_15             : num  -0.492 0.257 0.797 0.93 -0.492 ...
##  $ RIDIT_16             : num  0.263 -0.5 -0.5 0.805 -0.5 ...
##  $ RIDIT_17             : num  0.755 -0.508 -0.508 0.241 -0.508 ...
##  $ RIDIT_18             : num  -0.498 0.898 -0.498 -0.498 -0.498 ...
##  $ RIDIT_19             : num  0.301 -0.478 0.653 0.301 -0.478 ...
##  $ RIDIT_20             : num  0.624 0.245 -0.508 -0.508 -0.508 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 45
##   .. ..$ Claim_Number         : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Policy_ID            : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ CLAIM_AMOUNT         : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ PAID_AMOUNT          : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ CLAIM_SUSPICION_SCORE: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_01               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_02               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_03               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_04               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_05               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_06               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_07               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_08               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_09               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_10               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_11               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_12               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_13               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_14               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_15               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_16               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_17               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_18               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_19               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ IND_20               : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ RIDIT_01             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_02             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_03             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_04             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_05             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_06             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_07             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_08             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_09             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_10             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_11             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_12             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_13             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_14             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_15             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_16             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_17             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_18             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_19             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ RIDIT_20             : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"

Use bracket subsetting to remove variables 1-5. The left side of the comma refers to observations (rows), the right side refers to variables (columns):

claimsData2 <- claimsData[1:502, 6:25]

str(claimsData2)
## Classes 'tbl_df', 'tbl' and 'data.frame':    502 obs. of  20 variables:
##  $ IND_01: int  1 1 1 1 5 1 1 2 2 2 ...
##  $ IND_02: int  1 2 1 1 5 2 1 1 1 2 ...
##  $ IND_03: int  1 1 4 2 3 1 1 3 4 3 ...
##  $ IND_04: int  4 4 1 2 1 1 5 5 1 1 ...
##  $ IND_05: int  5 1 1 1 1 5 1 4 2 1 ...
##  $ IND_06: int  3 1 1 2 1 2 2 5 4 1 ...
##  $ IND_07: int  3 5 1 2 5 5 2 1 2 2 ...
##  $ IND_08: int  1 1 2 3 1 2 2 2 1 1 ...
##  $ IND_09: int  2 2 3 1 3 1 1 5 1 1 ...
##  $ IND_10: int  2 1 5 2 2 1 1 3 1 1 ...
##  $ IND_11: int  1 1 4 1 1 2 4 3 1 1 ...
##  $ IND_12: int  3 5 5 1 1 1 2 3 1 3 ...
##  $ IND_13: int  5 1 1 1 2 1 4 2 2 1 ...
##  $ IND_14: int  2 2 1 1 1 1 1 2 2 2 ...
##  $ IND_15: int  1 2 4 5 1 1 3 1 1 1 ...
##  $ IND_16: int  2 1 1 4 1 1 1 2 1 3 ...
##  $ IND_17: int  4 1 1 2 1 5 1 1 1 2 ...
##  $ IND_18: int  1 5 1 1 1 1 1 1 2 1 ...
##  $ IND_19: int  2 1 3 2 1 1 1 4 3 2 ...
##  $ IND_20: int  3 2 1 1 1 2 1 1 2 1 ...

Create a new variable rid to inspect RIDIT-transformed variables:

rid <- data.frame("RIDIT" = cbind("_01"= 2*(0 + 0.5*table(claimsData2$IND_01)[1]/502) -
1,"i2"=2*(table(claimsData2$IND_01)[1]/502 + 0.5*table(claimsData2$IND_01)[2]/502) - 1,"i5"=2*(table(claimsData2$IND_02)[1]/502 +
table(claimsData2$IND_02)[2]/502+table(claimsData2$IND_02)[3]/502+table(claimsData2$IND_02)[4]/502+0.5*table(claimsData2$IND_02)[5]/502) - 1))

rid
##    RIDIT._01  RIDIT.i2  RIDIT.i5
## 1 -0.5039841 0.2788845 0.9043825

Confirm that each RIDIT transformation results in appropriate directionality for the PRIDIT scoring method.

Generate scatterplot matrix of variance versus RIDIT score:

#Isolate the RIDIT-transformed variables only:

myRidit <- claimsData[1:502, 26:45]

#Perform principal component analysis:

myRiditPCA <- princomp(myRidit)

summary(myRiditPCA)
## Importance of components:
##                           Comp.1     Comp.2    Comp.3     Comp.4
## Standard deviation     0.6204574 0.60989278 0.5935317 0.58715941
## Proportion of Variance 0.0676317 0.06534816 0.0618891 0.06056733
## Cumulative Proportion  0.0676317 0.13297987 0.1948690 0.25543630
##                            Comp.5     Comp.6     Comp.7     Comp.8
## Standard deviation     0.57802699 0.57255472 0.56235383 0.55467705
## Proportion of Variance 0.05869791 0.05759177 0.05555789 0.05405138
## Cumulative Proportion  0.31413421 0.37172598 0.42728386 0.48133524
##                            Comp.9    Comp.10    Comp.11    Comp.12
## Standard deviation     0.55002517 0.54144035 0.53693981 0.51613035
## Proportion of Variance 0.05314856 0.05150242 0.05064978 0.04679993
## Cumulative Proportion  0.53448380 0.58598622 0.63663601 0.68343594
##                           Comp.13    Comp.14    Comp.15    Comp.16
## Standard deviation     0.51239584 0.50182463 0.49239670 0.48169212
## Proportion of Variance 0.04612513 0.04424155 0.04259481 0.04076294
## Cumulative Proportion  0.72956106 0.77380261 0.81639742 0.85716036
##                           Comp.17    Comp.18   Comp.19    Comp.20
## Standard deviation     0.47209012 0.45779413 0.4401871 0.43226189
## Proportion of Variance 0.03915401 0.03681856 0.0340409 0.03282618
## Cumulative Proportion  0.89631437 0.93313293 0.9671738 1.00000000
str(myRiditPCA)
## List of 7
##  $ sdev    : Named num [1:20] 0.62 0.61 0.594 0.587 0.578 ...
##   ..- attr(*, "names")= chr [1:20] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ loadings: loadings [1:20, 1:20] 0.3819 -0.09 0.0291 -0.241 -0.0527 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:20] "RIDIT_01" "RIDIT_02" "RIDIT_03" "RIDIT_04" ...
##   .. ..$ : chr [1:20] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ center  : Named num [1:20] -1.95e-10 -3.35e-10 -1.43e-10 -2.27e-10 1.99e-11 ...
##   ..- attr(*, "names")= chr [1:20] "RIDIT_01" "RIDIT_02" "RIDIT_03" "RIDIT_04" ...
##  $ scale   : Named num [1:20] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "names")= chr [1:20] "RIDIT_01" "RIDIT_02" "RIDIT_03" "RIDIT_04" ...
##  $ n.obs   : int 502
##  $ scores  : num [1:502, 1:20] -0.547 0.227 -0.576 -0.42 1.103 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:20] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ call    : language princomp(x = myRidit)
##  - attr(*, "class")= chr "princomp"
#Generate Scatterplot Matrix

plot(myRiditPCA)

Prepare three graphical visualizations of the results of the PRIDIT scoring method to the claims dile.

First, produce an R screeplot of the principal component analysis and report the ID of which IND components are needed to summarize the data.

screeplot(myRiditPCA)

Produce a variables factor map using the FactoMineR package and report which IND variables show strong correlation:

install.packages("FactoMineR", repos="https:/cran.rstudio.com")
## Installing package into 'C:/Users/joshu/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https:/cran.rstudio.com/src/contrib:
##   scheme not supported in URL 'https:/cran.rstudio.com/src/contrib/PACKAGES'
## Warning: package 'FactoMineR' is not available (for R version 3.4.3)
## Warning: unable to access index for repository https:/cran.rstudio.com/bin/windows/contrib/3.4:
##   scheme not supported in URL 'https:/cran.rstudio.com/bin/windows/contrib/3.4/PACKAGES'
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 3.4.4
res.pca <- PCA(claimsData[,6:25], scale.unit=TRUE, ncp=5, graph=T)

claimsData$CS2 <- as.factor(claimsData$CLAIM_SUSPICION_SCORE)

claimsData3 <- claimsData[c(6:25, 46)]

res.pca3 = PCA(claimsData3[1:21], scale.unit=TRUE, ncp=5, quali.sup=21, graph=T)

plotellipses(res.pca3,21)

dimdesc(res.pca3, axes = c(1,2))
## $Dim.1
## $Dim.1$quanti
##        correlation      p.value
## IND_15   0.4137257 3.531264e-22
## IND_12   0.3883859 1.603809e-19
## IND_05   0.3700536 9.749207e-18
## IND_19   0.3241380 9.599476e-14
## IND_16   0.2808956 1.480954e-10
## IND_17   0.2492469 1.512722e-08
## IND_03   0.2222806 4.880961e-07
## IND_08   0.2188547 7.366473e-07
## IND_04   0.2127891 1.502255e-06
## IND_10   0.2101967 2.024415e-06
## IND_11   0.2036375 4.235536e-06
## IND_13   0.1656488 1.930413e-04
## IND_18  -0.2461701 2.297015e-08
## IND_07  -0.2487519 1.618487e-08
## IND_14  -0.2766059 2.873733e-10
## IND_01  -0.4571870 2.693492e-27
## 
## $Dim.1$quali
##            R2      p.value
## CS2 0.0877466 2.797239e-09
## 
## $Dim.1$category
##     Estimate      p.value
## 4  0.9068265 1.171975e-06
## 5  1.7950570 4.567370e-04
## 1 -1.6909579 1.294066e-02
## 3 -0.2863728 9.835442e-03
## 
## 
## $Dim.2
## $Dim.2$quanti
##        correlation      p.value
## IND_09   0.5811923 1.086625e-46
## IND_19   0.4236294 2.793044e-23
## IND_17   0.4148013 2.691696e-22
## IND_16   0.2012068 5.534886e-06
## IND_07   0.1609693 2.932106e-04
## IND_18   0.1243927 5.255170e-03
## IND_01   0.1132449 1.111255e-02
## IND_02   0.1096183 1.399763e-02
## IND_13   0.0988602 2.676662e-02
## IND_15  -0.1883979 2.150101e-05
## IND_14  -0.1895045 1.918932e-05
## IND_20  -0.1955243 1.021693e-05
## IND_04  -0.3100314 1.204495e-12
## IND_08  -0.5061809 5.089833e-34