1. Load Library

library(readr)
library(dplyr)
library(ggplot2)
library(corrplot)
library(psych)
library(FactoMineR)
library(factoextra)

2. Load Dataset

finance_data <- read_csv("C:/Users/LENOVO/Downloads/5k.csv")
## Rows: 5000 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Occupation, Risk Tolerance, Investment Goals, Income Level, Addres...
## dbl  (2): Age, Loan Term (Months)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Standarisasi nama kolom (menghilangkan spasi dan simbol)
names(finance_data) <- make.names(names(finance_data))

head(finance_data)
## # A tibble: 6 × 19
##     Age Occupation Risk.Tolerance Investment.Goals    Income.Level Address      
##   <dbl> <chr>      <chr>          <chr>               <chr>        <chr>        
## 1    40 Lawyer     High           Wealth Preservation $46044.94    "7168 Moody …
## 2    30 Teacher    Low            Wealth Preservation $57169.50    "50001 Hecto…
## 3    37 Teacher    Low            Speculation         $71760.86    "997 James I…
## 4    27 Student    Medium         Speculation         $-25488.15   "1607 Joshua…
## 5    36 Engineer   Low            Income Generation   $106777.95   "96690 Campb…
## 6    77 Doctor     Low            Income Generation   $59157.22    "23020 Jacks…
## # ℹ 13 more variables: Account.Balance <chr>, Deposits <chr>,
## #   Withdrawals <chr>, Transfers <chr>, International.Transfers <chr>,
## #   Investments <chr>, Loan.Amount <chr>, Loan.Purpose <chr>,
## #   Employment.Status <chr>, Loan.Term..Months. <dbl>, Interest.Rate <chr>,
## #   Loan.Status <chr>, Transaction.Description <chr>
str(finance_data)
## spc_tbl_ [5,000 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Age                    : num [1:5000] 40 30 37 27 36 77 70 45 56 61 ...
##  $ Occupation             : chr [1:5000] "Lawyer" "Teacher" "Teacher" "Student" ...
##  $ Risk.Tolerance         : chr [1:5000] "High" "Low" "Low" "Medium" ...
##  $ Investment.Goals       : chr [1:5000] "Wealth Preservation" "Wealth Preservation" "Speculation" "Speculation" ...
##  $ Income.Level           : chr [1:5000] "$46044.94" "$57169.50" "$71760.86" "$-25488.15" ...
##  $ Address                : chr [1:5000] "7168 Moody Meadow\nHernandezshire, PW 06016" "50001 Hector Square\nWest Luisfurt, MA 51935" "997 James Isle\nNorth Rebeccafurt, RI 13366" "1607 Joshua Camp Apt. 634\nConleymouth, CT 66479" ...
##  $ Account.Balance        : chr [1:5000] "$44653.26" "$29175.47" "$86141.59" "$1000.00" ...
##  $ Deposits               : chr [1:5000] "$9156.01" "$5933.22" "$22583.11" "$299.47" ...
##  $ Withdrawals            : chr [1:5000] "$9327.70" "$8671.60" "$16468.58" "$289.09" ...
##  $ Transfers              : chr [1:5000] "$3647.92" "$6729.86" "$6032.53" "$109.83" ...
##  $ International.Transfers: chr [1:5000] "$82.80" "$819.69" "$1526.33" "$3.51" ...
##  $ Investments            : chr [1:5000] "$8729.30" "$4545.18" "$8251.45" "$195.73" ...
##  $ Loan.Amount            : chr [1:5000] "$27010.93" "$31266.97" "$41260.58" "$5000.00" ...
##  $ Loan.Purpose           : chr [1:5000] "Medical Expenses" "Auto Purchase" "Auto Purchase" "Small Business" ...
##  $ Employment.Status      : chr [1:5000] "Retired" "Retired" "Employed" "Retired" ...
##  $ Loan.Term..Months.     : num [1:5000] 36 36 12 60 24 24 24 48 48 12 ...
##  $ Interest.Rate          : chr [1:5000] "11.94%" "8.08%" "13.07%" "6.73%" ...
##  $ Loan.Status            : chr [1:5000] "pending" "approved" "pending" "approved" ...
##  $ Transaction.Description: chr [1:5000] "Electronics transaction of $706.18 at Sanders, Roberts and Hughes" "Transaction at Evans-Smith for $2250.03" "Purchase at Taylor-Gutierrez for $615.85 on 2024-02-21" "Travel transaction of $4852.39 at Jones-Russell" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Age = col_double(),
##   ..   Occupation = col_character(),
##   ..   `Risk Tolerance` = col_character(),
##   ..   `Investment Goals` = col_character(),
##   ..   `Income Level` = col_character(),
##   ..   Address = col_character(),
##   ..   `Account Balance` = col_character(),
##   ..   Deposits = col_character(),
##   ..   Withdrawals = col_character(),
##   ..   Transfers = col_character(),
##   ..   `International Transfers` = col_character(),
##   ..   Investments = col_character(),
##   ..   `Loan Amount` = col_character(),
##   ..   `Loan Purpose` = col_character(),
##   ..   `Employment Status` = col_character(),
##   ..   `Loan Term (Months)` = col_double(),
##   ..   `Interest Rate` = col_character(),
##   ..   `Loan Status` = col_character(),
##   ..   `Transaction Description` = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

3. Data Preprocessing

3.1 Pembersihan Format Angka

currency_columns <- c("Income.Level", "Account.Balance", "Deposits", "Withdrawals", "Transfers", "International.Transfers", "Investments", "Loan.Amount")

for (col in currency_columns) {
  finance_data[[col]] <- gsub("\\$", "", finance_data[[col]])
  finance_data[[col]] <- gsub(",", "", finance_data[[col]])
  finance_data[[col]] <- as.numeric(finance_data[[col]])
}

finance_data$Interest.Rate <- gsub("%", "", finance_data$Interest.Rate)
finance_data$Interest.Rate <- as.numeric(finance_data$Interest.Rate) / 100

str(finance_data)
## spc_tbl_ [5,000 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Age                    : num [1:5000] 40 30 37 27 36 77 70 45 56 61 ...
##  $ Occupation             : chr [1:5000] "Lawyer" "Teacher" "Teacher" "Student" ...
##  $ Risk.Tolerance         : chr [1:5000] "High" "Low" "Low" "Medium" ...
##  $ Investment.Goals       : chr [1:5000] "Wealth Preservation" "Wealth Preservation" "Speculation" "Speculation" ...
##  $ Income.Level           : num [1:5000] 46045 57170 71761 -25488 106778 ...
##  $ Address                : chr [1:5000] "7168 Moody Meadow\nHernandezshire, PW 06016" "50001 Hector Square\nWest Luisfurt, MA 51935" "997 James Isle\nNorth Rebeccafurt, RI 13366" "1607 Joshua Camp Apt. 634\nConleymouth, CT 66479" ...
##  $ Account.Balance        : num [1:5000] 44653 29175 86142 1000 77919 ...
##  $ Deposits               : num [1:5000] 9156 5933 22583 299 13853 ...
##  $ Withdrawals            : num [1:5000] 9328 8672 16469 289 22699 ...
##  $ Transfers              : num [1:5000] 3648 6730 6033 110 12189 ...
##  $ International.Transfers: num [1:5000] 82.8 819.69 1526.33 3.51 2152.51 ...
##  $ Investments            : num [1:5000] 8729 4545 8251 196 9725 ...
##  $ Loan.Amount            : num [1:5000] 27011 31267 41261 5000 50000 ...
##  $ Loan.Purpose           : chr [1:5000] "Medical Expenses" "Auto Purchase" "Auto Purchase" "Small Business" ...
##  $ Employment.Status      : chr [1:5000] "Retired" "Retired" "Employed" "Retired" ...
##  $ Loan.Term..Months.     : num [1:5000] 36 36 12 60 24 24 24 48 48 12 ...
##  $ Interest.Rate          : num [1:5000] 0.1194 0.0808 0.1307 0.0673 0.106 ...
##  $ Loan.Status            : chr [1:5000] "pending" "approved" "pending" "approved" ...
##  $ Transaction.Description: chr [1:5000] "Electronics transaction of $706.18 at Sanders, Roberts and Hughes" "Transaction at Evans-Smith for $2250.03" "Purchase at Taylor-Gutierrez for $615.85 on 2024-02-21" "Travel transaction of $4852.39 at Jones-Russell" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Age = col_double(),
##   ..   Occupation = col_character(),
##   ..   `Risk Tolerance` = col_character(),
##   ..   `Investment Goals` = col_character(),
##   ..   `Income Level` = col_character(),
##   ..   Address = col_character(),
##   ..   `Account Balance` = col_character(),
##   ..   Deposits = col_character(),
##   ..   Withdrawals = col_character(),
##   ..   Transfers = col_character(),
##   ..   `International Transfers` = col_character(),
##   ..   Investments = col_character(),
##   ..   `Loan Amount` = col_character(),
##   ..   `Loan Purpose` = col_character(),
##   ..   `Employment Status` = col_character(),
##   ..   `Loan Term (Months)` = col_double(),
##   ..   `Interest Rate` = col_character(),
##   ..   `Loan Status` = col_character(),
##   ..   `Transaction Description` = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

3.2 Menghapus Kolom Tidak Digunakan

finance_data <- finance_data %>% select(-Occupation, -Investment.Goals, -Address, -Transaction.Description)

head(finance_data)
## # A tibble: 6 × 15
##     Age Risk.Tolerance Income.Level Account.Balance Deposits Withdrawals
##   <dbl> <chr>                 <dbl>           <dbl>    <dbl>       <dbl>
## 1    40 High                 46045.          44653.    9156.       9328.
## 2    30 Low                  57170.          29175.    5933.       8672.
## 3    37 Low                  71761.          86142.   22583.      16469.
## 4    27 Medium              -25488.           1000      299.        289.
## 5    36 Low                 106778.          77919.   13853.      22699.
## 6    77 Low                  59157.          61521.   22053.      16970.
## # ℹ 9 more variables: Transfers <dbl>, International.Transfers <dbl>,
## #   Investments <dbl>, Loan.Amount <dbl>, Loan.Purpose <chr>,
## #   Employment.Status <chr>, Loan.Term..Months. <dbl>, Interest.Rate <dbl>,
## #   Loan.Status <chr>

4. Missing Value dan Duplikasi

colSums(is.na(finance_data))
##                     Age          Risk.Tolerance            Income.Level 
##                       0                       0                       0 
##         Account.Balance                Deposits             Withdrawals 
##                       0                       0                       0 
##               Transfers International.Transfers             Investments 
##                       0                       0                       0 
##             Loan.Amount            Loan.Purpose       Employment.Status 
##                       0                       0                       0 
##      Loan.Term..Months.           Interest.Rate             Loan.Status 
##                       0                       0                       0
sum(duplicated(finance_data))
## [1] 0

5. Mengatasi Outlier dengan Metode IQR

numerical_columns <- c("Age", "Income.Level", "Account.Balance", "Deposits", "Withdrawals", "Transfers", "International.Transfers", "Investments", "Loan.Amount", "Loan.Term..Months.", "Interest.Rate")

remove_outliers_iqr <- function(data, column){
  Q1 <- quantile(data[[column]], 0.25, na.rm = TRUE)
  Q3 <- quantile(data[[column]], 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  
  data %>% filter(.data[[column]] >= lower_bound & .data[[column]] <= upper_bound)
}

for (col in numerical_columns) {
  finance_data <- remove_outliers_iqr(finance_data, col)
}

dim(finance_data)
## [1] 4543   15

Visualisasi Boxplot

for (col in numerical_columns) {
  print(
    ggplot(finance_data, aes(y = .data[[col]])) +
      geom_boxplot() +
      ggtitle(paste("Boxplot of", col)) +
      theme_minimal()
  )
}

6. Statistik Deskriptif

data_numeric <- finance_data %>%
  select(where(is.numeric))

describe(data_numeric)
##                         vars    n     mean       sd   median  trimmed      mad
## Age                        1 4543    51.75    19.56    52.00    51.82    25.20
## Income.Level               2 4543 69032.56 29050.63 68605.30 68926.58 29949.56
## Account.Balance            3 4543 58729.19 32853.32 57378.22 59886.26 47846.11
## Deposits                   4 4543 17551.08 12488.10 14623.09 16310.36 12424.93
## Withdrawals                5 4543 10361.58  7569.72  8511.19  9590.10  7601.88
## Transfers                  6 4543  8462.06  5873.72  7233.53  7912.89  6126.04
## International.Transfers    7 4543  1153.67  1051.45   817.95  1002.50   897.17
## Investments                8 4543  8317.43  5819.26  6984.33  7762.11  5775.76
## Loan.Amount                9 4543 37561.36 15201.27 47406.86 39682.80  3844.59
## Loan.Term..Months.        10 4543    36.24    17.06    36.00    36.29    17.79
## Interest.Rate             11 4543     0.11     0.02     0.11     0.11     0.02
##                               min       max     range  skew kurtosis     se
## Age                         18.00     85.00     67.00 -0.02    -1.20   0.29
## Income.Level            -10338.99 151827.63 162166.62  0.04    -0.33 431.01
## Account.Balance           1000.00 100000.00  99000.00 -0.07    -1.41 487.43
## Deposits                   109.34  49992.09  49882.75  0.74    -0.35 185.28
## Withdrawals                 50.50  29996.72  29946.22  0.75    -0.37 112.31
## Transfers                   51.99  24991.81  24939.82  0.71    -0.30  87.14
## International.Transfers      0.01   4378.21   4378.20  1.09     0.39  15.60
## Investments                 51.22  26656.02  26604.80  0.78    -0.06  86.34
## Loan.Amount               5000.00  50000.00  45000.00 -0.81    -0.85 225.53
## Loan.Term..Months.          12.00     60.00     48.00 -0.01    -1.31   0.25
## Interest.Rate                0.05      0.15      0.10 -0.30    -0.46   0.00

7. Visualisasi Distribusi

for (col in colnames(data_numeric)) {
  print(
    ggplot(data_numeric, aes(x = .data[[col]])) +
      geom_histogram(bins = 10) +
      ggtitle(paste("Distribusi", col)) +
      theme_minimal()
  )
}

8. Analisis Korelasi

# Pastikan hanya kolom numerik
data_numeric <- finance_data[, sapply(finance_data, is.numeric)]

# Pastikan tidak ada nama kosong
colnames(data_numeric) <- make.names(colnames(data_numeric), unique = TRUE)

# Buat matriks korelasi
corr_matrix <- cor(data_numeric, use = "complete.obs")
corr_matrix
##                                   Age Income.Level Account.Balance     Deposits
## Age                      1.0000000000 -0.015117950    0.0005111657 -0.001758733
## Income.Level            -0.0151179497  1.000000000    0.5230815794  0.412883956
## Account.Balance          0.0005111657  0.523081579    1.0000000000  0.783699871
## Deposits                -0.0017587329  0.412883956    0.7836998713  1.000000000
## Withdrawals              0.0006842343  0.413480687    0.7680698227  0.604359799
## Transfers                0.0184533315  0.396838695    0.7707323895  0.604959884
## International.Transfers  0.0113596705  0.282507182    0.5298357848  0.401860972
## Investments             -0.0171003557  0.354797465    0.6971569979  0.555109298
## Loan.Amount             -0.0098203824  0.485774935    0.2849468803  0.228226333
## Loan.Term..Months.       0.0021245599 -0.002061079   -0.0238092117 -0.023958973
## Interest.Rate           -0.0179645419  0.319932567    0.1908740510  0.143217776
##                           Withdrawals   Transfers International.Transfers
## Age                      0.0006842343  0.01845333             0.011359671
## Income.Level             0.4134806872  0.39683870             0.282507182
## Account.Balance          0.7680698227  0.77073239             0.529835785
## Deposits                 0.6043597995  0.60495988             0.401860972
## Withdrawals              1.0000000000  0.58621658             0.400282653
## Transfers                0.5862165799  1.00000000             0.631149189
## International.Transfers  0.4002826534  0.63114919             1.000000000
## Investments              0.5410310215  0.53656419             0.374822796
## Loan.Amount              0.2360602592  0.21991683             0.175022788
## Loan.Term..Months.      -0.0234913521 -0.01112869            -0.005986568
## Interest.Rate            0.1511950911  0.14724767             0.123244216
##                         Investments  Loan.Amount Loan.Term..Months.
## Age                     -0.01710036 -0.009820382        0.002124560
## Income.Level             0.35479746  0.485774935       -0.002061079
## Account.Balance          0.69715700  0.284946880       -0.023809212
## Deposits                 0.55510930  0.228226333       -0.023958973
## Withdrawals              0.54103102  0.236060259       -0.023491352
## Transfers                0.53656419  0.219916828       -0.011128690
## International.Transfers  0.37482280  0.175022788       -0.005986568
## Investments              1.00000000  0.209617025       -0.024017817
## Loan.Amount              0.20961703  1.000000000        0.017708758
## Loan.Term..Months.      -0.02401782  0.017708758        1.000000000
## Interest.Rate            0.13895187  0.684915534        0.009669201
##                         Interest.Rate
## Age                      -0.017964542
## Income.Level              0.319932567
## Account.Balance           0.190874051
## Deposits                  0.143217776
## Withdrawals               0.151195091
## Transfers                 0.147247667
## International.Transfers   0.123244216
## Investments               0.138951873
## Loan.Amount               0.684915534
## Loan.Term..Months.        0.009669201
## Interest.Rate             1.000000000
# Determinan
det_corr <- det(corr_matrix)

det_corr
## [1] 0.005740848

9. Uji Asumsi PCA dan FA

1. Correlation

cor(data_numeric)
##                                   Age Income.Level Account.Balance     Deposits
## Age                      1.0000000000 -0.015117950    0.0005111657 -0.001758733
## Income.Level            -0.0151179497  1.000000000    0.5230815794  0.412883956
## Account.Balance          0.0005111657  0.523081579    1.0000000000  0.783699871
## Deposits                -0.0017587329  0.412883956    0.7836998713  1.000000000
## Withdrawals              0.0006842343  0.413480687    0.7680698227  0.604359799
## Transfers                0.0184533315  0.396838695    0.7707323895  0.604959884
## International.Transfers  0.0113596705  0.282507182    0.5298357848  0.401860972
## Investments             -0.0171003557  0.354797465    0.6971569979  0.555109298
## Loan.Amount             -0.0098203824  0.485774935    0.2849468803  0.228226333
## Loan.Term..Months.       0.0021245599 -0.002061079   -0.0238092117 -0.023958973
## Interest.Rate           -0.0179645419  0.319932567    0.1908740510  0.143217776
##                           Withdrawals   Transfers International.Transfers
## Age                      0.0006842343  0.01845333             0.011359671
## Income.Level             0.4134806872  0.39683870             0.282507182
## Account.Balance          0.7680698227  0.77073239             0.529835785
## Deposits                 0.6043597995  0.60495988             0.401860972
## Withdrawals              1.0000000000  0.58621658             0.400282653
## Transfers                0.5862165799  1.00000000             0.631149189
## International.Transfers  0.4002826534  0.63114919             1.000000000
## Investments              0.5410310215  0.53656419             0.374822796
## Loan.Amount              0.2360602592  0.21991683             0.175022788
## Loan.Term..Months.      -0.0234913521 -0.01112869            -0.005986568
## Interest.Rate            0.1511950911  0.14724767             0.123244216
##                         Investments  Loan.Amount Loan.Term..Months.
## Age                     -0.01710036 -0.009820382        0.002124560
## Income.Level             0.35479746  0.485774935       -0.002061079
## Account.Balance          0.69715700  0.284946880       -0.023809212
## Deposits                 0.55510930  0.228226333       -0.023958973
## Withdrawals              0.54103102  0.236060259       -0.023491352
## Transfers                0.53656419  0.219916828       -0.011128690
## International.Transfers  0.37482280  0.175022788       -0.005986568
## Investments              1.00000000  0.209617025       -0.024017817
## Loan.Amount              0.20961703  1.000000000        0.017708758
## Loan.Term..Months.      -0.02401782  0.017708758        1.000000000
## Interest.Rate            0.13895187  0.684915534        0.009669201
##                         Interest.Rate
## Age                      -0.017964542
## Income.Level              0.319932567
## Account.Balance           0.190874051
## Deposits                  0.143217776
## Withdrawals               0.151195091
## Transfers                 0.147247667
## International.Transfers   0.123244216
## Investments               0.138951873
## Loan.Amount               0.684915534
## Loan.Term..Months.        0.009669201
## Interest.Rate             1.000000000
corrplot::corrplot(cor(data_numeric), tl.col = "black", tl.srt = 45, tl.cex = 0.5)

2. Bartlett Test

cortest.bartlett(corr_matrix, n = nrow(data_numeric))
## $chisq
## [1] 23414.17
## 
## $p.value
## [1] 0
## 
## $df
## [1] 55

3. KMO Test

kmo_result <- KMO(data_numeric)
kmo_result$MSA      # Overall
## [1] 0.8381424
kmo_result$MSAi     # Per variabel
##                     Age            Income.Level         Account.Balance 
##               0.4414030               0.8936623               0.7916738 
##                Deposits             Withdrawals               Transfers 
##               0.9007616               0.9056171               0.8687981 
## International.Transfers             Investments             Loan.Amount 
##               0.8784007               0.9273359               0.6621825 
##      Loan.Term..Months.           Interest.Rate 
##               0.7798015               0.6332531
if(kmo_result$MSA > 0.8){
  cat("Nilai KMO menunjukkan kategori sangat baik.")
}
## Nilai KMO menunjukkan kategori sangat baik.

10. Penghapusan Variabel (Age)

data_new <- data_numeric %>%
  select(-Age)

KMO(data_new)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_new)
## Overall MSA =  0.84
## MSA for each item = 
##            Income.Level         Account.Balance                Deposits 
##                    0.89                    0.79                    0.90 
##             Withdrawals               Transfers International.Transfers 
##                    0.91                    0.87                    0.88 
##             Investments             Loan.Amount      Loan.Term..Months. 
##                    0.93                    0.66                    0.78 
##           Interest.Rate 
##                    0.63

11. Standardisasi Data

pca_result <- prcomp(data_new, scale. = TRUE)

12. Principal Component Analysis (PCA)

eigenvalues <- pca_result$sdev^2
variance_ratio <- eigenvalues / sum(eigenvalues)

hasil_pca <- data.frame(
  Eigenvalue = eigenvalues,
  Proporsi_Varians = variance_ratio,
  Kumulatif_Varians = cumsum(variance_ratio)
)

round(hasil_pca, 3)
##    Eigenvalue Proporsi_Varians Kumulatif_Varians
## 1       4.498            0.450             0.450
## 2       1.597            0.160             0.609
## 3       0.999            0.100             0.709
## 4       0.751            0.075             0.785
## 5       0.590            0.059             0.843
## 6       0.469            0.047             0.890
## 7       0.396            0.040             0.930
## 8       0.303            0.030             0.960
## 9       0.285            0.028             0.989
## 10      0.113            0.011             1.000

Scree Plot

plot(eigenvalues, type = "b",
     xlab = "Komponen",
     ylab = "Eigenvalue",
     main = "Scree Plot PCA")
abline(h = 1, col = "red", lty = 2)

Berdasarkan scree plot, terjadi penurunan tajam dari komponen pertama ke kedua, kemudian relatif stabil setelah komponen ketiga, sehingga tiga komponen dianggap optimal.

Menentukan jumlah komponen

sum(eigenvalues > 1)
## [1] 2

Kaisar Rule (Eigenvalue>1)

jumlah_komponen <- sum(eigenvalues > 1)

cat("Berdasarkan Kaiser Rule, jumlah komponen yang dipertahankan adalah:", jumlah_komponen)
## Berdasarkan Kaiser Rule, jumlah komponen yang dipertahankan adalah: 2

PCA dengan 3 komponen

Z_pca <- pca_result$x[,1:3]
dim(Z_pca)
## [1] 4543    3

Loading Matrix

loadings <- pca_result$rotation[,1:3]
round(loadings, 3)
##                            PC1    PC2    PC3
## Income.Level             0.303  0.267 -0.013
## Account.Balance          0.438 -0.141  0.004
## Deposits                 0.376 -0.146 -0.005
## Withdrawals              0.372 -0.131 -0.006
## Transfers                0.387 -0.173  0.039
## International.Transfers  0.300 -0.142  0.057
## Investments              0.347 -0.141 -0.010
## Loan.Amount              0.220  0.623 -0.023
## Loan.Term..Months.      -0.011  0.055  0.996
## Interest.Rate            0.167  0.640 -0.038
apply(loadings, 2, function(x) names(x[abs(x) > 0.5]))
## $PC1
## character(0)
## 
## $PC2
## [1] "Loan.Amount"   "Interest.Rate"
## 
## $PC3
## [1] "Loan.Term..Months."

Cumulative Variance >=70%

kum70 <- which(cumsum(variance_ratio) >= 0.70)[1]

cat("Jumlah komponen dengan cumulative variance ≥ 70% adalah:", kum70)
## Jumlah komponen dengan cumulative variance ≥ 70% adalah: 3

Total Variance Explained

kableExtra::kable(round(hasil_pca,3))
Eigenvalue Proporsi_Varians Kumulatif_Varians
4.498 0.450 0.450
1.597 0.160 0.609
0.999 0.100 0.709
0.751 0.075 0.785
0.590 0.059 0.843
0.469 0.047 0.890
0.396 0.040 0.930
0.303 0.030 0.960
0.285 0.028 0.989
0.113 0.011 1.000

13. Factor Analysis (FA)

fa.parallel(data_new, fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
fa_result <- fa(data_new, nfactors = 3, rotate = "varimax", scores=TRUE)

fa_result$loadings
## 
## Loadings:
##                         MR1    MR2    MR3   
## Income.Level             0.402  0.440  0.191
## Account.Balance          0.891  0.192  0.403
## Deposits                 0.716  0.151  0.296
## Withdrawals              0.694  0.163  0.294
## Transfers                0.544  0.128  0.647
## International.Transfers  0.229         0.764
## Investments              0.623  0.141  0.283
## Loan.Amount                     0.993       
## Loan.Term..Months.                          
## Interest.Rate                   0.680       
## 
##                  MR1   MR2   MR3
## SS loadings    2.693 1.774 1.467
## Proportion Var 0.269 0.177 0.147
## Cumulative Var 0.269 0.447 0.593
fa_result$communality
##            Income.Level         Account.Balance                Deposits 
##             0.392307372             0.993101143             0.623103852 
##             Withdrawals               Transfers International.Transfers 
##             0.594823549             0.730392145             0.645714917 
##             Investments             Loan.Amount      Loan.Term..Months. 
##             0.487932003             0.997746098             0.001737274 
##           Interest.Rate 
##             0.467673533
fa_eigen <- eigen(cor(data_new))$values
plot(fa_eigen, type="b", main="Scree Plot FA")
abline(h=1, col="red", lty=2)

factor_scores <- fa_result$scores
head(factor_scores)
##              MR1        MR2         MR3
## [1,]  0.01373275 -0.6251408 -0.84715384
## [2,] -0.87610400 -0.3521884 -0.07621892
## [3,]  0.96299452  0.2149285 -0.22745068
## [4,]  0.20139972  0.7526182  0.66985115
## [5,] -0.09072745  0.8238452  0.06394933
## [6,]  1.39448313  0.7367044 -0.28951701

Communality Summary

round(fa_result$communality,3)
##            Income.Level         Account.Balance                Deposits 
##                   0.392                   0.993                   0.623 
##             Withdrawals               Transfers International.Transfers 
##                   0.595                   0.730                   0.646 
##             Investments             Loan.Amount      Loan.Term..Months. 
##                   0.488                   0.998                   0.002 
##           Interest.Rate 
##                   0.468
mean(fa_result$communality)
## [1] 0.5934532

14. Perbandingan PCA dan FA

cat("Jumlah komponen PCA:", 3, "\n")
## Jumlah komponen PCA: 3
cat("Jumlah faktor FA:", 3)
## Jumlah faktor FA: 3

15. Kesimpulan

Hasil analisis menunjukkan bahwa tiga komponen utama mampu merepresentasikan variasi data secara optimal berdasarkan Kaiser Rule. Analisis faktor dengan rotasi varimax memperjelas struktur laten variabel keuangan dalam dataset.