library(readr)
library(dplyr)
library(ggplot2)
library(corrplot)
library(psych)
library(FactoMineR)
library(factoextra)
finance_data <- read_csv("C:/Users/LENOVO/Downloads/5k.csv")
## Rows: 5000 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Occupation, Risk Tolerance, Investment Goals, Income Level, Addres...
## dbl (2): Age, Loan Term (Months)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Standarisasi nama kolom (menghilangkan spasi dan simbol)
names(finance_data) <- make.names(names(finance_data))
head(finance_data)
## # A tibble: 6 × 19
## Age Occupation Risk.Tolerance Investment.Goals Income.Level Address
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 40 Lawyer High Wealth Preservation $46044.94 "7168 Moody …
## 2 30 Teacher Low Wealth Preservation $57169.50 "50001 Hecto…
## 3 37 Teacher Low Speculation $71760.86 "997 James I…
## 4 27 Student Medium Speculation $-25488.15 "1607 Joshua…
## 5 36 Engineer Low Income Generation $106777.95 "96690 Campb…
## 6 77 Doctor Low Income Generation $59157.22 "23020 Jacks…
## # ℹ 13 more variables: Account.Balance <chr>, Deposits <chr>,
## # Withdrawals <chr>, Transfers <chr>, International.Transfers <chr>,
## # Investments <chr>, Loan.Amount <chr>, Loan.Purpose <chr>,
## # Employment.Status <chr>, Loan.Term..Months. <dbl>, Interest.Rate <chr>,
## # Loan.Status <chr>, Transaction.Description <chr>
str(finance_data)
## spc_tbl_ [5,000 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:5000] 40 30 37 27 36 77 70 45 56 61 ...
## $ Occupation : chr [1:5000] "Lawyer" "Teacher" "Teacher" "Student" ...
## $ Risk.Tolerance : chr [1:5000] "High" "Low" "Low" "Medium" ...
## $ Investment.Goals : chr [1:5000] "Wealth Preservation" "Wealth Preservation" "Speculation" "Speculation" ...
## $ Income.Level : chr [1:5000] "$46044.94" "$57169.50" "$71760.86" "$-25488.15" ...
## $ Address : chr [1:5000] "7168 Moody Meadow\nHernandezshire, PW 06016" "50001 Hector Square\nWest Luisfurt, MA 51935" "997 James Isle\nNorth Rebeccafurt, RI 13366" "1607 Joshua Camp Apt. 634\nConleymouth, CT 66479" ...
## $ Account.Balance : chr [1:5000] "$44653.26" "$29175.47" "$86141.59" "$1000.00" ...
## $ Deposits : chr [1:5000] "$9156.01" "$5933.22" "$22583.11" "$299.47" ...
## $ Withdrawals : chr [1:5000] "$9327.70" "$8671.60" "$16468.58" "$289.09" ...
## $ Transfers : chr [1:5000] "$3647.92" "$6729.86" "$6032.53" "$109.83" ...
## $ International.Transfers: chr [1:5000] "$82.80" "$819.69" "$1526.33" "$3.51" ...
## $ Investments : chr [1:5000] "$8729.30" "$4545.18" "$8251.45" "$195.73" ...
## $ Loan.Amount : chr [1:5000] "$27010.93" "$31266.97" "$41260.58" "$5000.00" ...
## $ Loan.Purpose : chr [1:5000] "Medical Expenses" "Auto Purchase" "Auto Purchase" "Small Business" ...
## $ Employment.Status : chr [1:5000] "Retired" "Retired" "Employed" "Retired" ...
## $ Loan.Term..Months. : num [1:5000] 36 36 12 60 24 24 24 48 48 12 ...
## $ Interest.Rate : chr [1:5000] "11.94%" "8.08%" "13.07%" "6.73%" ...
## $ Loan.Status : chr [1:5000] "pending" "approved" "pending" "approved" ...
## $ Transaction.Description: chr [1:5000] "Electronics transaction of $706.18 at Sanders, Roberts and Hughes" "Transaction at Evans-Smith for $2250.03" "Purchase at Taylor-Gutierrez for $615.85 on 2024-02-21" "Travel transaction of $4852.39 at Jones-Russell" ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. Occupation = col_character(),
## .. `Risk Tolerance` = col_character(),
## .. `Investment Goals` = col_character(),
## .. `Income Level` = col_character(),
## .. Address = col_character(),
## .. `Account Balance` = col_character(),
## .. Deposits = col_character(),
## .. Withdrawals = col_character(),
## .. Transfers = col_character(),
## .. `International Transfers` = col_character(),
## .. Investments = col_character(),
## .. `Loan Amount` = col_character(),
## .. `Loan Purpose` = col_character(),
## .. `Employment Status` = col_character(),
## .. `Loan Term (Months)` = col_double(),
## .. `Interest Rate` = col_character(),
## .. `Loan Status` = col_character(),
## .. `Transaction Description` = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
currency_columns <- c("Income.Level", "Account.Balance", "Deposits", "Withdrawals", "Transfers", "International.Transfers", "Investments", "Loan.Amount")
for (col in currency_columns) {
finance_data[[col]] <- gsub("\\$", "", finance_data[[col]])
finance_data[[col]] <- gsub(",", "", finance_data[[col]])
finance_data[[col]] <- as.numeric(finance_data[[col]])
}
finance_data$Interest.Rate <- gsub("%", "", finance_data$Interest.Rate)
finance_data$Interest.Rate <- as.numeric(finance_data$Interest.Rate) / 100
str(finance_data)
## spc_tbl_ [5,000 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:5000] 40 30 37 27 36 77 70 45 56 61 ...
## $ Occupation : chr [1:5000] "Lawyer" "Teacher" "Teacher" "Student" ...
## $ Risk.Tolerance : chr [1:5000] "High" "Low" "Low" "Medium" ...
## $ Investment.Goals : chr [1:5000] "Wealth Preservation" "Wealth Preservation" "Speculation" "Speculation" ...
## $ Income.Level : num [1:5000] 46045 57170 71761 -25488 106778 ...
## $ Address : chr [1:5000] "7168 Moody Meadow\nHernandezshire, PW 06016" "50001 Hector Square\nWest Luisfurt, MA 51935" "997 James Isle\nNorth Rebeccafurt, RI 13366" "1607 Joshua Camp Apt. 634\nConleymouth, CT 66479" ...
## $ Account.Balance : num [1:5000] 44653 29175 86142 1000 77919 ...
## $ Deposits : num [1:5000] 9156 5933 22583 299 13853 ...
## $ Withdrawals : num [1:5000] 9328 8672 16469 289 22699 ...
## $ Transfers : num [1:5000] 3648 6730 6033 110 12189 ...
## $ International.Transfers: num [1:5000] 82.8 819.69 1526.33 3.51 2152.51 ...
## $ Investments : num [1:5000] 8729 4545 8251 196 9725 ...
## $ Loan.Amount : num [1:5000] 27011 31267 41261 5000 50000 ...
## $ Loan.Purpose : chr [1:5000] "Medical Expenses" "Auto Purchase" "Auto Purchase" "Small Business" ...
## $ Employment.Status : chr [1:5000] "Retired" "Retired" "Employed" "Retired" ...
## $ Loan.Term..Months. : num [1:5000] 36 36 12 60 24 24 24 48 48 12 ...
## $ Interest.Rate : num [1:5000] 0.1194 0.0808 0.1307 0.0673 0.106 ...
## $ Loan.Status : chr [1:5000] "pending" "approved" "pending" "approved" ...
## $ Transaction.Description: chr [1:5000] "Electronics transaction of $706.18 at Sanders, Roberts and Hughes" "Transaction at Evans-Smith for $2250.03" "Purchase at Taylor-Gutierrez for $615.85 on 2024-02-21" "Travel transaction of $4852.39 at Jones-Russell" ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. Occupation = col_character(),
## .. `Risk Tolerance` = col_character(),
## .. `Investment Goals` = col_character(),
## .. `Income Level` = col_character(),
## .. Address = col_character(),
## .. `Account Balance` = col_character(),
## .. Deposits = col_character(),
## .. Withdrawals = col_character(),
## .. Transfers = col_character(),
## .. `International Transfers` = col_character(),
## .. Investments = col_character(),
## .. `Loan Amount` = col_character(),
## .. `Loan Purpose` = col_character(),
## .. `Employment Status` = col_character(),
## .. `Loan Term (Months)` = col_double(),
## .. `Interest Rate` = col_character(),
## .. `Loan Status` = col_character(),
## .. `Transaction Description` = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
finance_data <- finance_data %>% select(-Occupation, -Investment.Goals, -Address, -Transaction.Description)
head(finance_data)
## # A tibble: 6 × 15
## Age Risk.Tolerance Income.Level Account.Balance Deposits Withdrawals
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 40 High 46045. 44653. 9156. 9328.
## 2 30 Low 57170. 29175. 5933. 8672.
## 3 37 Low 71761. 86142. 22583. 16469.
## 4 27 Medium -25488. 1000 299. 289.
## 5 36 Low 106778. 77919. 13853. 22699.
## 6 77 Low 59157. 61521. 22053. 16970.
## # ℹ 9 more variables: Transfers <dbl>, International.Transfers <dbl>,
## # Investments <dbl>, Loan.Amount <dbl>, Loan.Purpose <chr>,
## # Employment.Status <chr>, Loan.Term..Months. <dbl>, Interest.Rate <dbl>,
## # Loan.Status <chr>
colSums(is.na(finance_data))
## Age Risk.Tolerance Income.Level
## 0 0 0
## Account.Balance Deposits Withdrawals
## 0 0 0
## Transfers International.Transfers Investments
## 0 0 0
## Loan.Amount Loan.Purpose Employment.Status
## 0 0 0
## Loan.Term..Months. Interest.Rate Loan.Status
## 0 0 0
sum(duplicated(finance_data))
## [1] 0
numerical_columns <- c("Age", "Income.Level", "Account.Balance", "Deposits", "Withdrawals", "Transfers", "International.Transfers", "Investments", "Loan.Amount", "Loan.Term..Months.", "Interest.Rate")
remove_outliers_iqr <- function(data, column){
Q1 <- quantile(data[[column]], 0.25, na.rm = TRUE)
Q3 <- quantile(data[[column]], 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
data %>% filter(.data[[column]] >= lower_bound & .data[[column]] <= upper_bound)
}
for (col in numerical_columns) {
finance_data <- remove_outliers_iqr(finance_data, col)
}
dim(finance_data)
## [1] 4543 15
for (col in numerical_columns) {
print(
ggplot(finance_data, aes(y = .data[[col]])) +
geom_boxplot() +
ggtitle(paste("Boxplot of", col)) +
theme_minimal()
)
}
data_numeric <- finance_data %>%
select(where(is.numeric))
describe(data_numeric)
## vars n mean sd median trimmed mad
## Age 1 4543 51.75 19.56 52.00 51.82 25.20
## Income.Level 2 4543 69032.56 29050.63 68605.30 68926.58 29949.56
## Account.Balance 3 4543 58729.19 32853.32 57378.22 59886.26 47846.11
## Deposits 4 4543 17551.08 12488.10 14623.09 16310.36 12424.93
## Withdrawals 5 4543 10361.58 7569.72 8511.19 9590.10 7601.88
## Transfers 6 4543 8462.06 5873.72 7233.53 7912.89 6126.04
## International.Transfers 7 4543 1153.67 1051.45 817.95 1002.50 897.17
## Investments 8 4543 8317.43 5819.26 6984.33 7762.11 5775.76
## Loan.Amount 9 4543 37561.36 15201.27 47406.86 39682.80 3844.59
## Loan.Term..Months. 10 4543 36.24 17.06 36.00 36.29 17.79
## Interest.Rate 11 4543 0.11 0.02 0.11 0.11 0.02
## min max range skew kurtosis se
## Age 18.00 85.00 67.00 -0.02 -1.20 0.29
## Income.Level -10338.99 151827.63 162166.62 0.04 -0.33 431.01
## Account.Balance 1000.00 100000.00 99000.00 -0.07 -1.41 487.43
## Deposits 109.34 49992.09 49882.75 0.74 -0.35 185.28
## Withdrawals 50.50 29996.72 29946.22 0.75 -0.37 112.31
## Transfers 51.99 24991.81 24939.82 0.71 -0.30 87.14
## International.Transfers 0.01 4378.21 4378.20 1.09 0.39 15.60
## Investments 51.22 26656.02 26604.80 0.78 -0.06 86.34
## Loan.Amount 5000.00 50000.00 45000.00 -0.81 -0.85 225.53
## Loan.Term..Months. 12.00 60.00 48.00 -0.01 -1.31 0.25
## Interest.Rate 0.05 0.15 0.10 -0.30 -0.46 0.00
for (col in colnames(data_numeric)) {
print(
ggplot(data_numeric, aes(x = .data[[col]])) +
geom_histogram(bins = 10) +
ggtitle(paste("Distribusi", col)) +
theme_minimal()
)
}
# Pastikan hanya kolom numerik
data_numeric <- finance_data[, sapply(finance_data, is.numeric)]
# Pastikan tidak ada nama kosong
colnames(data_numeric) <- make.names(colnames(data_numeric), unique = TRUE)
# Buat matriks korelasi
corr_matrix <- cor(data_numeric, use = "complete.obs")
corr_matrix
## Age Income.Level Account.Balance Deposits
## Age 1.0000000000 -0.015117950 0.0005111657 -0.001758733
## Income.Level -0.0151179497 1.000000000 0.5230815794 0.412883956
## Account.Balance 0.0005111657 0.523081579 1.0000000000 0.783699871
## Deposits -0.0017587329 0.412883956 0.7836998713 1.000000000
## Withdrawals 0.0006842343 0.413480687 0.7680698227 0.604359799
## Transfers 0.0184533315 0.396838695 0.7707323895 0.604959884
## International.Transfers 0.0113596705 0.282507182 0.5298357848 0.401860972
## Investments -0.0171003557 0.354797465 0.6971569979 0.555109298
## Loan.Amount -0.0098203824 0.485774935 0.2849468803 0.228226333
## Loan.Term..Months. 0.0021245599 -0.002061079 -0.0238092117 -0.023958973
## Interest.Rate -0.0179645419 0.319932567 0.1908740510 0.143217776
## Withdrawals Transfers International.Transfers
## Age 0.0006842343 0.01845333 0.011359671
## Income.Level 0.4134806872 0.39683870 0.282507182
## Account.Balance 0.7680698227 0.77073239 0.529835785
## Deposits 0.6043597995 0.60495988 0.401860972
## Withdrawals 1.0000000000 0.58621658 0.400282653
## Transfers 0.5862165799 1.00000000 0.631149189
## International.Transfers 0.4002826534 0.63114919 1.000000000
## Investments 0.5410310215 0.53656419 0.374822796
## Loan.Amount 0.2360602592 0.21991683 0.175022788
## Loan.Term..Months. -0.0234913521 -0.01112869 -0.005986568
## Interest.Rate 0.1511950911 0.14724767 0.123244216
## Investments Loan.Amount Loan.Term..Months.
## Age -0.01710036 -0.009820382 0.002124560
## Income.Level 0.35479746 0.485774935 -0.002061079
## Account.Balance 0.69715700 0.284946880 -0.023809212
## Deposits 0.55510930 0.228226333 -0.023958973
## Withdrawals 0.54103102 0.236060259 -0.023491352
## Transfers 0.53656419 0.219916828 -0.011128690
## International.Transfers 0.37482280 0.175022788 -0.005986568
## Investments 1.00000000 0.209617025 -0.024017817
## Loan.Amount 0.20961703 1.000000000 0.017708758
## Loan.Term..Months. -0.02401782 0.017708758 1.000000000
## Interest.Rate 0.13895187 0.684915534 0.009669201
## Interest.Rate
## Age -0.017964542
## Income.Level 0.319932567
## Account.Balance 0.190874051
## Deposits 0.143217776
## Withdrawals 0.151195091
## Transfers 0.147247667
## International.Transfers 0.123244216
## Investments 0.138951873
## Loan.Amount 0.684915534
## Loan.Term..Months. 0.009669201
## Interest.Rate 1.000000000
# Determinan
det_corr <- det(corr_matrix)
det_corr
## [1] 0.005740848
cor(data_numeric)
## Age Income.Level Account.Balance Deposits
## Age 1.0000000000 -0.015117950 0.0005111657 -0.001758733
## Income.Level -0.0151179497 1.000000000 0.5230815794 0.412883956
## Account.Balance 0.0005111657 0.523081579 1.0000000000 0.783699871
## Deposits -0.0017587329 0.412883956 0.7836998713 1.000000000
## Withdrawals 0.0006842343 0.413480687 0.7680698227 0.604359799
## Transfers 0.0184533315 0.396838695 0.7707323895 0.604959884
## International.Transfers 0.0113596705 0.282507182 0.5298357848 0.401860972
## Investments -0.0171003557 0.354797465 0.6971569979 0.555109298
## Loan.Amount -0.0098203824 0.485774935 0.2849468803 0.228226333
## Loan.Term..Months. 0.0021245599 -0.002061079 -0.0238092117 -0.023958973
## Interest.Rate -0.0179645419 0.319932567 0.1908740510 0.143217776
## Withdrawals Transfers International.Transfers
## Age 0.0006842343 0.01845333 0.011359671
## Income.Level 0.4134806872 0.39683870 0.282507182
## Account.Balance 0.7680698227 0.77073239 0.529835785
## Deposits 0.6043597995 0.60495988 0.401860972
## Withdrawals 1.0000000000 0.58621658 0.400282653
## Transfers 0.5862165799 1.00000000 0.631149189
## International.Transfers 0.4002826534 0.63114919 1.000000000
## Investments 0.5410310215 0.53656419 0.374822796
## Loan.Amount 0.2360602592 0.21991683 0.175022788
## Loan.Term..Months. -0.0234913521 -0.01112869 -0.005986568
## Interest.Rate 0.1511950911 0.14724767 0.123244216
## Investments Loan.Amount Loan.Term..Months.
## Age -0.01710036 -0.009820382 0.002124560
## Income.Level 0.35479746 0.485774935 -0.002061079
## Account.Balance 0.69715700 0.284946880 -0.023809212
## Deposits 0.55510930 0.228226333 -0.023958973
## Withdrawals 0.54103102 0.236060259 -0.023491352
## Transfers 0.53656419 0.219916828 -0.011128690
## International.Transfers 0.37482280 0.175022788 -0.005986568
## Investments 1.00000000 0.209617025 -0.024017817
## Loan.Amount 0.20961703 1.000000000 0.017708758
## Loan.Term..Months. -0.02401782 0.017708758 1.000000000
## Interest.Rate 0.13895187 0.684915534 0.009669201
## Interest.Rate
## Age -0.017964542
## Income.Level 0.319932567
## Account.Balance 0.190874051
## Deposits 0.143217776
## Withdrawals 0.151195091
## Transfers 0.147247667
## International.Transfers 0.123244216
## Investments 0.138951873
## Loan.Amount 0.684915534
## Loan.Term..Months. 0.009669201
## Interest.Rate 1.000000000
corrplot::corrplot(cor(data_numeric), tl.col = "black", tl.srt = 45, tl.cex = 0.5)
cortest.bartlett(corr_matrix, n = nrow(data_numeric))
## $chisq
## [1] 23414.17
##
## $p.value
## [1] 0
##
## $df
## [1] 55
kmo_result <- KMO(data_numeric)
kmo_result$MSA # Overall
## [1] 0.8381424
kmo_result$MSAi # Per variabel
## Age Income.Level Account.Balance
## 0.4414030 0.8936623 0.7916738
## Deposits Withdrawals Transfers
## 0.9007616 0.9056171 0.8687981
## International.Transfers Investments Loan.Amount
## 0.8784007 0.9273359 0.6621825
## Loan.Term..Months. Interest.Rate
## 0.7798015 0.6332531
if(kmo_result$MSA > 0.8){
cat("Nilai KMO menunjukkan kategori sangat baik.")
}
## Nilai KMO menunjukkan kategori sangat baik.
data_new <- data_numeric %>%
select(-Age)
KMO(data_new)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_new)
## Overall MSA = 0.84
## MSA for each item =
## Income.Level Account.Balance Deposits
## 0.89 0.79 0.90
## Withdrawals Transfers International.Transfers
## 0.91 0.87 0.88
## Investments Loan.Amount Loan.Term..Months.
## 0.93 0.66 0.78
## Interest.Rate
## 0.63
pca_result <- prcomp(data_new, scale. = TRUE)
eigenvalues <- pca_result$sdev^2
variance_ratio <- eigenvalues / sum(eigenvalues)
hasil_pca <- data.frame(
Eigenvalue = eigenvalues,
Proporsi_Varians = variance_ratio,
Kumulatif_Varians = cumsum(variance_ratio)
)
round(hasil_pca, 3)
## Eigenvalue Proporsi_Varians Kumulatif_Varians
## 1 4.498 0.450 0.450
## 2 1.597 0.160 0.609
## 3 0.999 0.100 0.709
## 4 0.751 0.075 0.785
## 5 0.590 0.059 0.843
## 6 0.469 0.047 0.890
## 7 0.396 0.040 0.930
## 8 0.303 0.030 0.960
## 9 0.285 0.028 0.989
## 10 0.113 0.011 1.000
plot(eigenvalues, type = "b",
xlab = "Komponen",
ylab = "Eigenvalue",
main = "Scree Plot PCA")
abline(h = 1, col = "red", lty = 2)
Berdasarkan scree plot, terjadi penurunan tajam dari komponen pertama ke kedua, kemudian relatif stabil setelah komponen ketiga, sehingga tiga komponen dianggap optimal.
sum(eigenvalues > 1)
## [1] 2
jumlah_komponen <- sum(eigenvalues > 1)
cat("Berdasarkan Kaiser Rule, jumlah komponen yang dipertahankan adalah:", jumlah_komponen)
## Berdasarkan Kaiser Rule, jumlah komponen yang dipertahankan adalah: 2
Z_pca <- pca_result$x[,1:3]
dim(Z_pca)
## [1] 4543 3
loadings <- pca_result$rotation[,1:3]
round(loadings, 3)
## PC1 PC2 PC3
## Income.Level 0.303 0.267 -0.013
## Account.Balance 0.438 -0.141 0.004
## Deposits 0.376 -0.146 -0.005
## Withdrawals 0.372 -0.131 -0.006
## Transfers 0.387 -0.173 0.039
## International.Transfers 0.300 -0.142 0.057
## Investments 0.347 -0.141 -0.010
## Loan.Amount 0.220 0.623 -0.023
## Loan.Term..Months. -0.011 0.055 0.996
## Interest.Rate 0.167 0.640 -0.038
apply(loadings, 2, function(x) names(x[abs(x) > 0.5]))
## $PC1
## character(0)
##
## $PC2
## [1] "Loan.Amount" "Interest.Rate"
##
## $PC3
## [1] "Loan.Term..Months."
kum70 <- which(cumsum(variance_ratio) >= 0.70)[1]
cat("Jumlah komponen dengan cumulative variance ≥ 70% adalah:", kum70)
## Jumlah komponen dengan cumulative variance ≥ 70% adalah: 3
kableExtra::kable(round(hasil_pca,3))
| Eigenvalue | Proporsi_Varians | Kumulatif_Varians |
|---|---|---|
| 4.498 | 0.450 | 0.450 |
| 1.597 | 0.160 | 0.609 |
| 0.999 | 0.100 | 0.709 |
| 0.751 | 0.075 | 0.785 |
| 0.590 | 0.059 | 0.843 |
| 0.469 | 0.047 | 0.890 |
| 0.396 | 0.040 | 0.930 |
| 0.303 | 0.030 | 0.960 |
| 0.285 | 0.028 | 0.989 |
| 0.113 | 0.011 | 1.000 |
fa.parallel(data_new, fa = "fa")
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
fa_result <- fa(data_new, nfactors = 3, rotate = "varimax", scores=TRUE)
fa_result$loadings
##
## Loadings:
## MR1 MR2 MR3
## Income.Level 0.402 0.440 0.191
## Account.Balance 0.891 0.192 0.403
## Deposits 0.716 0.151 0.296
## Withdrawals 0.694 0.163 0.294
## Transfers 0.544 0.128 0.647
## International.Transfers 0.229 0.764
## Investments 0.623 0.141 0.283
## Loan.Amount 0.993
## Loan.Term..Months.
## Interest.Rate 0.680
##
## MR1 MR2 MR3
## SS loadings 2.693 1.774 1.467
## Proportion Var 0.269 0.177 0.147
## Cumulative Var 0.269 0.447 0.593
fa_result$communality
## Income.Level Account.Balance Deposits
## 0.392307372 0.993101143 0.623103852
## Withdrawals Transfers International.Transfers
## 0.594823549 0.730392145 0.645714917
## Investments Loan.Amount Loan.Term..Months.
## 0.487932003 0.997746098 0.001737274
## Interest.Rate
## 0.467673533
fa_eigen <- eigen(cor(data_new))$values
plot(fa_eigen, type="b", main="Scree Plot FA")
abline(h=1, col="red", lty=2)
factor_scores <- fa_result$scores
head(factor_scores)
## MR1 MR2 MR3
## [1,] 0.01373275 -0.6251408 -0.84715384
## [2,] -0.87610400 -0.3521884 -0.07621892
## [3,] 0.96299452 0.2149285 -0.22745068
## [4,] 0.20139972 0.7526182 0.66985115
## [5,] -0.09072745 0.8238452 0.06394933
## [6,] 1.39448313 0.7367044 -0.28951701
round(fa_result$communality,3)
## Income.Level Account.Balance Deposits
## 0.392 0.993 0.623
## Withdrawals Transfers International.Transfers
## 0.595 0.730 0.646
## Investments Loan.Amount Loan.Term..Months.
## 0.488 0.998 0.002
## Interest.Rate
## 0.468
mean(fa_result$communality)
## [1] 0.5934532
cat("Jumlah komponen PCA:", 3, "\n")
## Jumlah komponen PCA: 3
cat("Jumlah faktor FA:", 3)
## Jumlah faktor FA: 3
Hasil analisis menunjukkan bahwa tiga komponen utama mampu merepresentasikan variasi data secara optimal berdasarkan Kaiser Rule. Analisis faktor dengan rotasi varimax memperjelas struktur laten variabel keuangan dalam dataset.