Notes:

-RIDIT Transformation converts categorical responses into numerical scores between -1 and 1.

-PCA helps reduce dimensionality and detect patterns in RIDIT scores or other numeric indicators.

-The plotellipses function visualizes the contribution of qualitative variables in PCA.

-This R Markdown file produces interactive visualizations and reports results in an organized document.

-PCA reduces dimensionality; first few components explain majority of variance.

-Using ‘quali.sup’ in PCA allows visualization of categorical outcomes without including them in variance computation.

-Scree plots help determine the number of principal components to retain.

1. Load Required Libraries

library(psych)        
library(corrplot)     
library(Hmisc)        
library(tidyverse)    
library(GGally)       
library(factoextra)   
library(FactoMineR)   
library(emuR)         

2. Load Dataset

filename <- "/Users/rameshbabuparamkusham/Documents/DAT 610/DAT 610 Auto Accident Personal Injury Claims.csv"
data <- read.csv(filename, header = TRUE)
head(data)
##   Claim_Number Policy_ID CLAIM_AMOUNT PAID_AMOUNT CLAIM_SUSPICION_SCORE IND_01
## 1           NA        NA                                             NA     NA
## 2      5001463    364697     $13,463      $3,646                      3      1
## 3           NA        NA                                             NA     NA
## 4      5004844    426960      $1,246        $594                      3      1
## 5           NA        NA                                             NA     NA
## 6      5005493    426313     $19,883     $15,138                      3      1
##   IND_02 IND_03 IND_04 IND_05 IND_06 IND_07 IND_08 IND_09 IND_10 IND_11 IND_12
## 1     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 2      1      1      4      5      3      3      1      2      2      1      3
## 3     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 4      2      1      4      1      1      5      1      2      1      1      5
## 5     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 6      1      4      1      1      1      1      2      3      5      4      5
##   IND_13 IND_14 IND_15 IND_16 IND_17 IND_18 IND_19 IND_20   RIDIT_01   RIDIT_02
## 1     NA     NA     NA     NA     NA     NA     NA     NA         NA         NA
## 2      5      2      1      2      4      1      2      3 -0.5039841 -0.5059761
## 3     NA     NA     NA     NA     NA     NA     NA     NA         NA         NA
## 4      1      2      2      1      1      5      1      2 -0.5039841  0.2290837
## 5     NA     NA     NA     NA     NA     NA     NA     NA         NA         NA
## 6      1      1      4      1      1      1      3      1 -0.5039841 -0.5059761
##     RIDIT_03   RIDIT_04   RIDIT_05   RIDIT_06   RIDIT_07   RIDIT_08  RIDIT_09
## 1         NA         NA         NA         NA         NA         NA        NA
## 2 -0.4701195  0.7888446  0.8964143  0.6513944  0.5916335 -0.5039841 0.2310757
## 3         NA         NA         NA         NA         NA         NA        NA
## 4 -0.4701195  0.7888446 -0.4980080 -0.4920319  0.9143426 -0.5039841 0.2310757
## 5         NA         NA         NA         NA         NA         NA        NA
## 6  0.7988048 -0.4920319 -0.4980080 -0.4920319 -0.5278884  0.2649402 0.6055777
##     RIDIT_10   RIDIT_11  RIDIT_12   RIDIT_13   RIDIT_14   RIDIT_15   RIDIT_16
## 1         NA         NA        NA         NA         NA         NA         NA
## 2  0.3147410 -0.5139442 0.6354582  0.9302789  0.2370518 -0.4920319  0.2629482
## 3         NA         NA        NA         NA         NA         NA         NA
## 4 -0.4561753 -0.5139442 0.9203187 -0.4960159  0.2370518  0.2569721 -0.5000000
## 5         NA         NA        NA         NA         NA         NA         NA
## 6  0.9163347  0.7430279 0.9203187 -0.4960159 -0.5139442  0.7968127 -0.5000000
##     RIDIT_17   RIDIT_18   RIDIT_19   RIDIT_20
## 1         NA         NA         NA         NA
## 2  0.7549801 -0.4980080  0.3007968  0.6235060
## 3         NA         NA         NA         NA
## 4 -0.5079681  0.8984064 -0.4780876  0.2450199
## 5         NA         NA         NA         NA
## 6 -0.5079681 -0.4980080  0.6533865 -0.5079681
head(data$PAID_AMOUNT)
## [1] ""         "$3,646 "  ""         "$594 "    ""         "$15,138 "
auto <- data
myData <- auto[1:502, 6:25]

3. RIDIT Transformation

-RIDIT (Relative to an Identified Distribution) converts ordered categorical variables into numerical scores, enabling comparison across categories.

# Compute RIDIT values for selected indicators
rid <- data.frame(
  "RIDIT" = cbind(
    "_01" = 2 * (0 + 0.5 * table(myData$IND_01)[1] / 502) - 1,
    "i2"  = 2 * (table(myData$IND_01)[1] / 502 + 0.5 * table(myData$IND_01)[2] / 502) - 1,
    "i5"  = 2 * (
      table(myData$IND_02)[1] / 502 +
      table(myData$IND_02)[2] / 502 +
      table(myData$IND_02)[3] / 502 +
      table(myData$IND_02)[4] / 502 +
      0.5 * table(myData$IND_02)[5] / 502
    ) - 1
  )
)
# Display RIDIT values
rid
##    RIDIT._01  RIDIT.i2    RIDIT.i5
## 1 -0.7669323 -0.376494 -0.04581673

4. Scatterplot of IND_01 vs RIDIT

attach(auto)
plot(auto[,7], auto[,27], 
     main = "Scatterplot of IND_01 vs RIDIT", 
     xlab = "IND_01", 
     ylab = "RIDIT_01", 
     pch = 19)

5. PCA on RIDIT-Transformed Variables

# Select RIDIT columns
myRidit <- auto[1:502, 26:45]

# Perform PCA using correlation matrix
myRiditPCA <- princomp(~ ., data = myRidit, cor = TRUE)

# Summary of PCA
summary(myRiditPCA)
## Importance of components:
##                            Comp.1     Comp.2     Comp.3    Comp.4     Comp.5
## Standard deviation     1.23981626 1.21132271 1.17313228 1.1170640 1.10801855
## Proportion of Variance 0.07685722 0.07336514 0.06881197 0.0623916 0.06138526
## Cumulative Proportion  0.07685722 0.15022235 0.21903432 0.2814259 0.34281117
##                           Comp.6     Comp.7     Comp.8     Comp.9    Comp.10
## Standard deviation     1.0652540 1.05981080 1.02337336 1.01406730 1.00265478
## Proportion of Variance 0.0567383 0.05615995 0.05236465 0.05141662 0.05026583
## Cumulative Proportion  0.3995495 0.45570942 0.50807408 0.55949070 0.60975653
##                           Comp.11    Comp.12    Comp.13    Comp.14    Comp.15
## Standard deviation     0.99272057 0.98810750 0.93065384 0.90732164 0.89409002
## Proportion of Variance 0.04927471 0.04881782 0.04330583 0.04116163 0.03996985
## Cumulative Proportion  0.65903124 0.70784906 0.75115489 0.79231651 0.83228636
##                           Comp.16    Comp.17    Comp.18    Comp.19    Comp.20
## Standard deviation     0.87224347 0.84693418 0.80929865 0.80064886 0.76168476
## Proportion of Variance 0.03804043 0.03586488 0.03274821 0.03205193 0.02900818
## Cumulative Proportion  0.87032680 0.90619167 0.93893989 0.97099182 1.00000000
# Scree plot to determine number of components
screeplot(myRiditPCA, type = "line")

6. PCA using FactoMineR

# PCA on original data columns
res.pca <- PCA(auto[,6:25], scale.unit = TRUE, ncp = 5, graph = TRUE)

7. PCA with Qualitative Supplementary Variable

-We transform CLAIM_SUSPICION_SCORE into a factor and include it as a qualitative supplementary variable.

# Transform claim suspicion score to factor
auto$CS2 <- as.factor(auto$CLAIM_SUSPICION_SCORE)

# Combine variables for PCA
myData2 <- auto[c(6:25, 46)]

# PCA including qualitative variable
res.pca3 <- PCA(myData2[1:21], scale.unit = TRUE, ncp = 5, quali.sup = 21, graph = TRUE)

# Scatterplots with ellipses
plotellipses(res.pca3, 21)