Read Dataset

df <- read_csv("Global Economy Indicators.csv")

Import dataset

Dataset Global Economy Indicators
CountryID Country Year AMA exchange rate IMF based exchange rate Population Currency Per capita GNI Agriculture, hunting, forestry, fishing (ISIC A-B) Changes in inventories Construction (ISIC F) Exports of goods and services Final consumption expenditure General government final consumption expenditure Gross capital formation Gross fixed capital formation (including Acquisitions less disposals of valuables) Household consumption expenditure (including Non-profit institutions serving households) Imports of goods and services Manufacturing (ISIC D) Mining, Manufacturing, Utilities (ISIC C-E) Other Activities (ISIC J-P) Total Value Added Transport, storage and communication (ISIC I) Wholesale, retail trade, restaurants and hotels (ISIC G-H) Gross National Income(GNI) in USD Gross Domestic Product (GDP)
4 Afghanistan 1970 0.0449984 0.0449984 10752971 Afghani 164 869917407 NA 46793902 165618722 1663221269 112126986 94611818 94611818 1551094283 195277226 370146827 376690811 127747843 1731454254 83917200 226387091 1766527525 1731435587
4 Afghanistan 1971 0.0449984 0.0449984 11015857 Afghani 168 910828104 NA 48994113 193580300 1796541240 121114833 99012350 99012350 1675426407 276296480 387549502 394401164 133754097 1812857077 87860382 237019196 1850121913 1812837521
4 Afghanistan 1972 0.0449984 0.0449984 11286753 Afghani 149 827945340 NA 44535223 227654380 1607159399 108347543 103456794 103456794 1498811856 290370350 352284669 358512865 121582672 1647917912 79864525 215477287 1683947905 1647900178
4 Afghanistan 1973 0.0449984 0.0449984 11575305 Afghani 150 855486925 NA 46018542 226913554 1617037233 109013455 121728433 121728433 1508023800 262962880 364010279 370445793 125630236 1702734673 82528885 222624293 1739998153 1702716294
4 Afghanistan 1974 0.0449984 0.0449984 11869879 Afghani 177 1035913365 NA 55721659 284938449 1907408182 128588961 175061875 175061875 1778819221 305679151 440760406 448552790 152119162 2061751510 99918604 269525910 2106420227 2061729287
4 Afghanistan 1975 0.0449984 0.0449984 12157386 Afghani 195 1165441381 NA 62686658 300493815 2131358499 143686711 221728484 221728484 1987671788 333827202 495891889 504659018 171142804 2319778374 112416374 303432162 2369877014 2319753506

Data Understanding

# Ukuran data
dim(df)
## [1] 10512    26
# Struktur dan tipe data
str(df)
## spc_tbl_ [10,512 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ CountryID                                                                               : num [1:10512] 4 4 4 4 4 4 4 4 4 4 ...
##  $ Country                                                                                 : chr [1:10512] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ Year                                                                                    : num [1:10512] 1970 1971 1972 1973 1974 ...
##  $ AMA exchange rate                                                                       : num [1:10512] 0.045 0.045 0.045 0.045 0.045 ...
##  $ IMF based exchange rate                                                                 : num [1:10512] 0.045 0.045 0.045 0.045 0.045 ...
##  $ Population                                                                              : num [1:10512] 10752971 11015857 11286753 11575305 11869879 ...
##  $ Currency                                                                                : chr [1:10512] "Afghani" "Afghani" "Afghani" "Afghani" ...
##  $ Per capita GNI                                                                          : num [1:10512] 164 168 149 150 177 195 207 231 254 285 ...
##  $ Agriculture, hunting, forestry, fishing (ISIC A-B)                                      : num [1:10512] 8.70e+08 9.11e+08 8.28e+08 8.55e+08 1.04e+09 ...
##  $ Changes in inventories                                                                  : num [1:10512] NA NA NA NA NA NA NA NA NA NA ...
##  $ Construction (ISIC F)                                                                   : num [1:10512] 46793902 48994113 44535223 46018542 55721659 ...
##  $ Exports of goods and services                                                           : num [1:10512] 1.66e+08 1.94e+08 2.28e+08 2.27e+08 2.85e+08 ...
##  $ Final consumption expenditure                                                           : num [1:10512] 1.66e+09 1.80e+09 1.61e+09 1.62e+09 1.91e+09 ...
##  $ General government final consumption expenditure                                        : num [1:10512] 1.12e+08 1.21e+08 1.08e+08 1.09e+08 1.29e+08 ...
##  $ Gross capital formation                                                                 : num [1:10512] 9.46e+07 9.90e+07 1.03e+08 1.22e+08 1.75e+08 ...
##  $ Gross fixed capital formation (including Acquisitions less disposals of valuables)      : num [1:10512] 9.46e+07 9.90e+07 1.03e+08 1.22e+08 1.75e+08 ...
##  $ Household consumption expenditure (including Non-profit institutions serving households): num [1:10512] 1.55e+09 1.68e+09 1.50e+09 1.51e+09 1.78e+09 ...
##  $ Imports of goods and services                                                           : num [1:10512] 1.95e+08 2.76e+08 2.90e+08 2.63e+08 3.06e+08 ...
##  $ Manufacturing (ISIC D)                                                                  : num [1:10512] 3.70e+08 3.88e+08 3.52e+08 3.64e+08 4.41e+08 ...
##  $ Mining, Manufacturing, Utilities (ISIC C-E)                                             : num [1:10512] 3.77e+08 3.94e+08 3.59e+08 3.70e+08 4.49e+08 ...
##  $ Other Activities (ISIC J-P)                                                             : num [1:10512] 1.28e+08 1.34e+08 1.22e+08 1.26e+08 1.52e+08 ...
##  $ Total Value Added                                                                       : num [1:10512] 1.73e+09 1.81e+09 1.65e+09 1.70e+09 2.06e+09 ...
##  $ Transport, storage and communication (ISIC I)                                           : num [1:10512] 83917200 87860382 79864525 82528885 99918604 ...
##  $ Wholesale, retail trade, restaurants and hotels (ISIC G-H)                              : num [1:10512] 2.26e+08 2.37e+08 2.15e+08 2.23e+08 2.70e+08 ...
##  $ Gross National Income(GNI) in USD                                                       : num [1:10512] 1.77e+09 1.85e+09 1.68e+09 1.74e+09 2.11e+09 ...
##  $ Gross Domestic Product (GDP)                                                            : num [1:10512] 1.73e+09 1.81e+09 1.65e+09 1.70e+09 2.06e+09 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   CountryID = col_double(),
##   ..   Country = col_character(),
##   ..   Year = col_double(),
##   ..   `AMA exchange rate` = col_double(),
##   ..   `IMF based exchange rate` = col_double(),
##   ..   Population = col_double(),
##   ..   Currency = col_character(),
##   ..   `Per capita GNI` = col_double(),
##   ..   `Agriculture, hunting, forestry, fishing (ISIC A-B)` = col_double(),
##   ..   `Changes in inventories` = col_double(),
##   ..   `Construction (ISIC F)` = col_double(),
##   ..   `Exports of goods and services` = col_double(),
##   ..   `Final consumption expenditure` = col_double(),
##   ..   `General government final consumption expenditure` = col_double(),
##   ..   `Gross capital formation` = col_double(),
##   ..   `Gross fixed capital formation (including Acquisitions less disposals of valuables)` = col_double(),
##   ..   `Household consumption expenditure (including Non-profit institutions serving households)` = col_double(),
##   ..   `Imports of goods and services` = col_double(),
##   ..   `Manufacturing (ISIC D)` = col_double(),
##   ..   `Mining, Manufacturing, Utilities (ISIC C-E)` = col_double(),
##   ..   `Other Activities (ISIC J-P)` = col_double(),
##   ..   `Total Value Added` = col_double(),
##   ..   `Transport, storage and communication (ISIC I)` = col_double(),
##   ..   `Wholesale, retail trade, restaurants and hotels (ISIC G-H)` = col_double(),
##   ..   `Gross National Income(GNI) in USD` = col_double(),
##   ..   `Gross Domestic Product (GDP)` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Statistik deskriptif
summary(df)
##    CountryID       Country               Year      AMA exchange rate  
##  Min.   :  4.0   Length:10512       Min.   :1970   Min.   :0.000e+00  
##  1st Qu.:212.0   Class :character   1st Qu.:1984   1st Qu.:1.000e+00  
##  Median :430.0   Mode  :character   Median :1997   Median :2.813e+00  
##  Mean   :431.1                      Mean   :1996   Mean   :3.574e+02  
##  3rd Qu.:643.0                      3rd Qu.:2009   3rd Qu.:5.134e+01  
##  Max.   :894.0                      Max.   :2021   Max.   :1.116e+05  
##                                                                       
##  IMF based exchange rate   Population          Currency        
##  Min.   :    0.000       Min.   :4.359e+03   Length:10512      
##  1st Qu.:    1.000       1st Qu.:6.331e+05   Class :character  
##  Median :    2.761       Median :5.052e+06   Mode  :character  
##  Mean   :  341.985       Mean   :2.852e+07                     
##  3rd Qu.:   48.067       3rd Qu.:1.679e+07                     
##  Max.   :42000.000       Max.   :1.426e+09                     
##                                                                
##  Per capita GNI   Agriculture, hunting, forestry, fishing (ISIC A-B)
##  Min.   :    34   Min.   :2.814e+04                                 
##  1st Qu.:   730   1st Qu.:1.305e+08                                 
##  Median :  2316   Median :9.314e+08                                 
##  Mean   :  8966   Mean   :7.793e+09                                 
##  3rd Qu.:  8966   3rd Qu.:4.023e+09                                 
##  Max.   :234317   Max.   :1.350e+12                                 
##                   NA's   :121                                       
##  Changes in inventories Construction (ISIC F) Exports of goods and services
##  Min.   :-1.510e+11     Min.   :1.577e+05     Min.   :-1.670e+09           
##  1st Qu.: 4.674e+05     1st Qu.:6.816e+07     1st Qu.: 4.027e+08           
##  Median : 6.830e+07     Median :3.718e+08     Median : 2.408e+09           
##  Mean   : 1.627e+09     Mean   :1.002e+10     Mean   : 4.671e+10           
##  3rd Qu.: 6.911e+08     3rd Qu.:2.816e+09     3rd Qu.: 1.542e+10           
##  Max.   : 2.110e+11     Max.   :1.240e+12     Max.   : 3.530e+12           
##  NA's   :1841                                 NA's   :21                   
##  Final consumption expenditure General government final consumption expenditure
##  Min.   :2.044e+06             Min.   :5.001e+05                               
##  1st Qu.:1.210e+09             1st Qu.:2.132e+08                               
##  Median :6.524e+09             Median :1.122e+09                               
##  Mean   :1.358e+11             Mean   :3.085e+10                               
##  3rd Qu.:3.784e+10             3rd Qu.:7.423e+09                               
##  Max.   :1.930e+13             Max.   :3.350e+12                               
##                                NA's   :52                                      
##  Gross capital formation
##  Min.   :-4.397e+10     
##  1st Qu.: 2.808e+08     
##  Median : 1.766e+09     
##  Mean   : 4.654e+10     
##  3rd Qu.: 1.241e+10     
##  Max.   : 7.600e+12     
##  NA's   :52             
##  Gross fixed capital formation (including Acquisitions less disposals of valuables)
##  Min.   :2.920e+05                                                                 
##  1st Qu.:2.735e+08                                                                 
##  Median :1.632e+09                                                                 
##  Mean   :4.519e+10                                                                 
##  3rd Qu.:1.166e+10                                                                 
##  Max.   :7.430e+12                                                                 
##  NA's   :52                                                                        
##  Household consumption expenditure (including Non-profit institutions serving households)
##  Min.   :7.960e+05                                                                       
##  1st Qu.:9.390e+08                                                                       
##  Median :5.154e+09                                                                       
##  Mean   :1.055e+11                                                                       
##  3rd Qu.:3.009e+10                                                                       
##  Max.   :1.590e+13                                                                       
##  NA's   :52                                                                              
##  Imports of goods and services Manufacturing (ISIC D)
##  Min.   :1.982e+06             Min.   :-2.485e+05    
##  1st Qu.:5.891e+08             1st Qu.: 1.109e+08    
##  Median :2.892e+09             Median : 9.114e+08    
##  Mean   :4.592e+10             Mean   : 3.093e+10    
##  3rd Qu.:1.602e+10             3rd Qu.: 7.405e+09    
##  Max.   :3.400e+12             Max.   : 4.870e+12    
##  NA's   :42                    NA's   :43            
##  Mining, Manufacturing, Utilities (ISIC C-E) Other Activities (ISIC J-P)
##  Min.   :-2.581e+06                          Min.   :1.044e+06          
##  1st Qu.: 1.857e+08                          1st Qu.:3.556e+08          
##  Median : 1.597e+09                          Median :2.107e+09          
##  Mean   : 4.153e+10                          Mean   :7.474e+10          
##  3rd Qu.: 1.251e+10                          3rd Qu.:1.442e+10          
##  Max.   : 5.780e+12                          Max.   :1.290e+13          
##                                                                         
##  Total Value Added   Transport, storage and communication (ISIC I)
##  Min.   :2.411e+06   Min.   :-6.479e+06                           
##  1st Qu.:1.356e+09   1st Qu.: 8.999e+07                           
##  Median :7.494e+09   Median : 5.442e+08                           
##  Mean   :1.745e+11   Mean   : 1.557e+10                           
##  3rd Qu.:4.819e+10   3rd Qu.: 3.976e+09                           
##  Max.   :2.330e+13   Max.   : 2.490e+12                           
##                      NA's   :49                                   
##  Wholesale, retail trade, restaurants and hotels (ISIC G-H)
##  Min.   :2.302e+05                                         
##  1st Qu.:2.079e+08                                         
##  Median :1.056e+09                                         
##  Mean   :2.510e+10                                         
##  3rd Qu.:6.707e+09                                         
##  Max.   :3.520e+12                                         
##  NA's   :49                                                
##  Gross National Income(GNI) in USD Gross Domestic Product (GDP)
##  Min.   :3.565e+06                 Min.   :2.585e+06           
##  1st Qu.:1.411e+09                 1st Qu.:1.439e+09           
##  Median :7.887e+09                 Median :8.071e+09           
##  Mean   :1.825e+11                 Mean   :1.829e+11           
##  3rd Qu.:5.006e+10                 3rd Qu.:5.173e+10           
##  Max.   :2.360e+13                 Max.   :2.330e+13           
## 
# Range tahun
range(df$Year, na.rm = TRUE)
## [1] 1970 2021
# Jumlah missing value
colSums(is.na(df))
##                                                                                CountryID 
##                                                                                        0 
##                                                                                  Country 
##                                                                                        0 
##                                                                                     Year 
##                                                                                        0 
##                                                                        AMA exchange rate 
##                                                                                        0 
##                                                                  IMF based exchange rate 
##                                                                                        0 
##                                                                               Population 
##                                                                                        0 
##                                                                                 Currency 
##                                                                                        0 
##                                                                           Per capita GNI 
##                                                                                        0 
##                                       Agriculture, hunting, forestry, fishing (ISIC A-B) 
##                                                                                      121 
##                                                                   Changes in inventories 
##                                                                                     1841 
##                                                                    Construction (ISIC F) 
##                                                                                        0 
##                                                            Exports of goods and services 
##                                                                                       21 
##                                                            Final consumption expenditure 
##                                                                                        0 
##                                         General government final consumption expenditure 
##                                                                                       52 
##                                                                  Gross capital formation 
##                                                                                       52 
##       Gross fixed capital formation (including Acquisitions less disposals of valuables) 
##                                                                                       52 
## Household consumption expenditure (including Non-profit institutions serving households) 
##                                                                                       52 
##                                                            Imports of goods and services 
##                                                                                       42 
##                                                                   Manufacturing (ISIC D) 
##                                                                                       43 
##                                              Mining, Manufacturing, Utilities (ISIC C-E) 
##                                                                                        0 
##                                                              Other Activities (ISIC J-P) 
##                                                                                        0 
##                                                                        Total Value Added 
##                                                                                        0 
##                                            Transport, storage and communication (ISIC I) 
##                                                                                       49 
##                               Wholesale, retail trade, restaurants and hotels (ISIC G-H) 
##                                                                                       49 
##                                                        Gross National Income(GNI) in USD 
##                                                                                        0 
##                                                             Gross Domestic Product (GDP) 
##                                                                                        0
# Kelengkapan data per tahun
df %>% group_by(Year) %>% summarise(country_total = n(), missing_percent = mean(is.na(unlist(across()))) * 100) %>% 
kable() %>% kable_styling(full_width = FALSE) %>% scroll_box(width = "100%", height = "400px")
Year country_total missing_percent
1970 187 1.1976048
1971 187 1.1976048
1972 187 1.1548332
1973 187 1.1120616
1974 187 1.1548332
1975 187 1.1120616
1976 187 1.1334474
1977 187 1.1334474
1978 187 1.1334474
1979 187 1.0692900
1980 187 1.0692900
1981 187 1.0479042
1982 187 1.0265184
1983 187 1.0265184
1984 187 1.0051326
1985 187 0.9837468
1986 187 1.0051326
1987 187 0.9409752
1988 187 0.9409752
1989 188 0.9572431
1990 216 0.8887243
1991 211 0.8150114
1992 211 0.9097801
1993 211 0.8529189
1994 210 0.8379356
1995 210 0.8569796
1996 210 0.8379356
1997 210 0.8188916
1998 210 0.8950676
1999 210 0.8950676
2000 210 0.8760236
2001 210 0.8379356
2002 210 0.7617597
2003 210 0.7808037
2004 210 0.6855837
2005 212 0.7168459
2006 212 0.6979815
2007 212 0.7168459
2008 214 0.7101476
2009 214 0.7475238
2010 214 0.7475238
2011 213 0.7322569
2012 213 0.7510327
2013 212 0.7357102
2014 212 0.7545746
2015 212 0.7734390
2016 212 0.7923033
2017 212 0.8111677
2018 212 0.8488964
2019 212 0.8866252
2020 212 0.9432183
2021 212 1.0375401
# cek duplikasi
sum(duplicated(df))
## [1] 0

Data Cleaning

Filter Tahun 2021

Penelitian difokuskan pada tahun 2021 agar analisis merepresentasikan kondisi ekonomi global pada periode terbaru dalam dataset. Oleh karena itu, data difilter hanya untuk observasi tahun 2021.

df_2021 <- df %>% filter(Year == 2021)
dim(df_2021)
## [1] 212  26

Cek outlier Univariat

Outlier univariat dideteksi menggunakan metode boxplot berbasis IQR. Transformasi log (log1p) digunakan untuk mengurangi skewness pada data ekonomi yang biasanya memiliki rentang sangat besar.

df_2021_numeric <- df_2021 %>%
  select(where(is.numeric)) %>%
  select(-Year)

colnames(df_2021_numeric) <- paste0("X", 1:ncol(df_2021_numeric))

# Boxplot univariat (metode IQR)
df_2021_numeric %>%
  mutate(across(everything(), log1p)) %>%
  pivot_longer(everything()) %>%
  ggplot(aes(x = name, y = value)) +
  geom_boxplot(fill = "steelblue",
               outlier.color = "red",
               outlier.size = 2) +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 0)) +
  labs(title = "Deteksi Outlier Univariat (Boxplot - IQR)",
       x = "Indikator (X1–X22)",
       y = "Nilai (log scale)")

Visualisasi di atas membantu mengidentifikasi nilai ekstrem pada masing-masing indikator.

Imputasi Missing Value

Tiga metode imputasi dibandingkan:

# Median 
df_median <- df_2021 %>%
  mutate(across(where(is.numeric),
                ~ ifelse(is.na(.), median(., na.rm = TRUE), .)))

Mengganti nilai hilang dengan median variabel.

# Mean 
df_mean <- df_2021 %>%
  mutate(across(where(is.numeric),
                ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

Mengganti nilai hilang dengan rata-rata variabel.

# KNN
df_knn <- df_2021 %>%
  select(where(is.numeric)) %>%
  kNN(k = 5) %>%
  select(-ends_with("_imp"))

Menggunakan metode K-Nearest Neighbors (k = 5) untuk memperkirakan nilai hilang berdasarkan kedekatan observasi.

Perbandingan ini dilakukan untuk menentukan metode imputasi terbaik berdasarkan uji kelayakan data.

Drop Variabel Non-Indikator

Variabel non-numerik dan identitas negara dihapus karena tidak relevan dalam analisis PCA dan FA.

df_med_clean  <- df_median %>% select(-Country, -Currency, -CountryID, -Year)
df_mean_clean <- df_mean   %>% select(-Country, -Currency, -CountryID, -Year)
df_knn_clean  <- df_knn    %>% select(-Year)

Hapus Varians Nol

Variabel dengan varians nol dihapus karena tidak memberikan informasi dalam analisis multivariat.

clean_var <- function(x){
  x %>% select(where(~ sd(., na.rm = TRUE) > 0))
}

df_med_clean  <- clean_var(df_med_clean)
df_mean_clean <- clean_var(df_mean_clean)
df_knn_clean  <- clean_var(df_knn_clean)

Standardisasi

Data distandarisasi menggunakan fungsi scale() sehingga memiliki: Mean = 0 Standar deviasi = 1 Standardisasi diperlukan karena PCA dan FA sensitif terhadap perbedaan skala variabel.

df_med_scaled  <- as.data.frame(scale(df_med_clean))
df_mean_scaled <- as.data.frame(scale(df_mean_clean))
df_knn_scaled  <- as.data.frame(scale(df_knn_clean))

Data

Data penelitian terdiri dari 22 indikator ekonomi yang digunakan dalam analisis Principal Component Analysis (PCA) dan Factor Analysis (FA). Berikut adalah daftar variabel yang digunakan:

Simbol Nama.Variabel
X1 AMA exchange rate
X2 IMF based exchange rate
X3 Population
X4 Per capita GNI
X5 Agriculture, hunting, forestry, fishing (ISIC A-B)
X6 Changes in inventories
X7 Construction (ISIC F)
X8 Exports of goods and services
X9 Final consumption expenditure
X10 General government final consumption expenditure
X11 Gross capital formation
X12 Gross fixed capital formation (including Acquisitions less disposals of valuables)
X13 Household consumption expenditure (including Non-profit institutions serving households)
X14 Imports of goods and services
X15 Manufacturing (ISIC D)
X16 Mining, Manufacturing, Utilities (ISIC C-E)
X17 Other Activities (ISIC J-P)
X18 Total Value Added
X19 Transport, storage and communication (ISIC I)
X20 Wholesale, retail trade, restaurants and hotels (ISIC G-H)
X21 Gross National Income (GNI) in USD
X22 Gross Domestic Product (GDP)

Cek outlier

Outlier multivariat dideteksi menggunakan Mahalanobis Distance. Cut-off ditentukan berdasarkan distribusi Chi-Square dengan derajat bebas sebesar jumlah variabel.

Observasi dengan nilai Mahalanobis melebihi cut-off dianggap sebagai outlier multivariat.

# Cek outlier (Mahalanobis Distance)
center <- colMeans(df_med_scaled)
cov_mat <- cov(df_med_scaled)

mahal_dist <- mahalanobis(df_med_scaled, center, cov_mat)
cutoff <- qchisq(0.975, df = ncol(df_med_scaled))
outlier_index <- which(mahal_dist > cutoff)

length(outlier_index)
## [1] 33
outlier_index
##  [1]   8   9  22  33  40  63  70  81  84  85  86  88  90  93  98 108 120 121 132
## [20] 142 152 159 167 172 174 176 179 185 188 194 202 205 209
plot(mahal_dist,
     type = "h",
     main = "Mahalanobis Distance Plot",
     xlab = "Observasi",
     ylab = "Mahalanobis Distance")
abline(h = cutoff, col = "red", lwd = 2)

Berdasarkan deteksi outlier secara univariat dan multivariat, ditemukan beberapa observasi yang teridentifikasi sebagai outlier. Namun karena jumlahnya relatif besar dan masih merepresentasikan variasi alami antar negara, observasi tersebut tidak dihapus agar tidak menghilangkan informasi penting dalam analisis.

Uji Kelayakan Data

Kaiser–Meyer–Olkin (KMO)

KMO mengukur kecukupan sampel untuk analisis faktor.

Kriteria interpretasi: 0.90 → Sangat baik 0.80–0.89 → Baik 0.70–0.79 → Cukup < 0.60 → Tidak layak

kmo_med  <- KMO(df_med_scaled)
kmo_mean <- KMO(df_mean_scaled)
kmo_knn  <- KMO(df_knn_scaled)

kmo_compare <- data.frame(
  Metode = c("Median","Mean","KNN"),
  KMO = c(kmo_med$MSA, kmo_mean$MSA, kmo_knn$MSA)
)

kable(kmo_compare) %>% kable_styling(full_width = FALSE)
Metode KMO
Median 0.7436081
Mean 0.7529374
KNN 0.7392948

Meskipun nilai KMO metode mean sedikit lebih tinggi, imputasi median dipilih karena data ekonomi cenderung mengandung outlier. Mean sensitif terhadap nilai ekstrem, sedangkan median lebih robust dan lebih stabil dalam merepresentasikan kecenderungan sentral data. Oleh karena itu, median dinilai lebih sesuai untuk menjaga struktur asli data sebelum dilakukan PCA dan FA.

Bartlett

Bartlett menguji apakah matriks korelasi berbeda signifikan dari matriks identitas. Jika p-value < 0.05 → data layak untuk FA.

bart_med  <- cortest.bartlett(df_med_scaled)
bart_mean <- cortest.bartlett(df_mean_scaled)
bart_knn  <- cortest.bartlett(df_knn_scaled)

bart_compare <- data.frame(
  Metode = c("Median","Mean","KNN"),
  p_value = c(bart_med$p.value,
              bart_mean$p.value,
              bart_knn$p.value)
)

kable(bart_compare) %>% kable_styling(full_width = FALSE)
Metode p_value
Median 0
Mean 0
KNN 0

Korelasi

Visualisasi korelasi dilakukan untuk melihat pola hubungan antar indikator ekonomi.

Adanya korelasi tinggi menunjukkan kemungkinan adanya struktur faktor laten.

kor <- cor(df_med_scaled)

corrplot(kor,
         method = "color",      
         type = "upper",        
         addCoef.col = "black", 
         number.cex = 0.7,      
         tl.cex = 0.8)          

PCA (DATA FINAL MENGGUNAKAN MEDIAN)

Load PCA

PCA dilakukan menggunakan fungsi prcomp() pada data yang telah distandarisasi.

pca_final <- prcomp(df_med_scaled, scale. = FALSE)
summary(pca_final)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5    PC6     PC7
## Standard deviation     4.0268 1.5081 1.28843 0.99357 0.62321 0.4965 0.37645
## Proportion of Variance 0.7371 0.1034 0.07546 0.04487 0.01765 0.0112 0.00644
## Cumulative Proportion  0.7371 0.8404 0.91590 0.96077 0.97843 0.9896 0.99607
##                            PC8    PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.22122 0.1244 0.09507 0.07229 0.04948 0.04821 0.03651
## Proportion of Variance 0.00222 0.0007 0.00041 0.00024 0.00011 0.00011 0.00006
## Cumulative Proportion  0.99830 0.9990 0.99941 0.99965 0.99976 0.99987 0.99993
##                           PC15    PC16    PC17     PC18     PC19      PC20
## Standard deviation     0.02956 0.02388 0.01222 0.005283 0.001145 0.0007479
## Proportion of Variance 0.00004 0.00003 0.00001 0.000000 0.000000 0.0000000
## Cumulative Proportion  0.99997 0.99999 1.00000 1.000000 1.000000 1.0000000
##                            PC21      PC22
## Standard deviation     0.000403 0.0002022
## Proportion of Variance 0.000000 0.0000000
## Cumulative Proportion  1.000000 1.0000000

Hitung Eigenvalue dari hasil prcomp

Eigenvalue dihitung dari kuadrat standar deviasi komponen utama. Eigenvalue menunjukkan jumlah variasi yang dijelaskan oleh setiap komponen.

eigen_values <- pca_final$sdev^2

Kaiser Criterion

Komponen dipilih jika memiliki eigenvalue > 1.

sum(eigen_values > 1)
## [1] 3

Cumulative Variance

Menghitung proporsi dan kumulatif variasi yang dijelaskan oleh komponen utama.

prop_var <- eigen_values / sum(eigen_values)
cum_var <- cumsum(prop_var)
cum_var
##  [1] 0.7370582 0.8404448 0.9159012 0.9607731 0.9784273 0.9896310 0.9960727
##  [8] 0.9982971 0.9990003 0.9994111 0.9996487 0.9997600 0.9998656 0.9999262
## [15] 0.9999659 0.9999919 0.9999986 0.9999999 1.0000000 1.0000000 1.0000000
## [22] 1.0000000

Scree Plot

Scree plot digunakan untuk mengidentifikasi titik elbow sebagai alternatif penentuan jumlah komponen.

fviz_eig(pca_final, addlabels = TRUE)

Contribution Variabel

Menunjukkan variabel mana yang paling berkontribusi pada PC1 dan PC2.

# Ke PC1
fviz_contrib(
  pca_final,
  choice = "var",   
  axes = 1,         
  top = 15,         
  fill = "steelblue"
) +
  geom_hline(yintercept = 100/22, linetype = 2)

# Ke PC2
fviz_contrib(
  pca_final,
  choice = "var",
  axes = 2,
  top = 15,
  fill = "steelblue"
) +
  geom_hline(yintercept = 100/22, linetype = 2)

Loading PCA

Loading menunjukkan korelasi antara variabel asli dan komponen utama.

fviz_pca_var(pca_final,
             col.var = "contrib",
             gradient.cols = c("blue","orange","red"),
             repel = TRUE)

PCA MANUAL (MEDIAN)

cov_mat <- cov(df_med_scaled)
eig_manual <- eigen(cov_mat)

eigen_values_manual  <- eig_manual$values
eigen_vectors_manual <- eig_manual$vectors

prop_var_manual <- eigen_values_manual / sum(eigen_values_manual)
cum_var_manual  <- cumsum(prop_var_manual)

pca_manual <- data.frame(
  Komponen = paste0("PC", 1:length(eigen_values_manual)),
  Eigenvalue = eigen_values_manual,
  Proporsi = prop_var_manual,
  Kumulatif = cum_var_manual
)

kable(pca_manual) %>% kable_styling(full_width = FALSE)
Komponen Eigenvalue Proporsi Kumulatif
PC1 16.2152794 0.7370582 0.7370582
PC2 2.2745056 0.1033866 0.8404448
PC3 1.6600415 0.0754564 0.9159012
PC4 0.9871808 0.0448719 0.9607731
PC5 0.3883928 0.0176542 0.9784273
PC6 0.2464823 0.0112037 0.9896310
PC7 0.1417162 0.0064416 0.9960727
PC8 0.0489369 0.0022244 0.9982971
PC9 0.0154703 0.0007032 0.9990003
PC10 0.0090389 0.0004109 0.9994111
PC11 0.0052266 0.0002376 0.9996487
PC12 0.0024478 0.0001113 0.9997600
PC13 0.0023246 0.0001057 0.9998656
PC14 0.0013328 0.0000606 0.9999262
PC15 0.0008739 0.0000397 0.9999659
PC16 0.0005704 0.0000259 0.9999919
PC17 0.0001493 0.0000068 0.9999986
PC18 0.0000279 0.0000013 0.9999999
PC19 0.0000013 0.0000001 1.0000000
PC20 0.0000006 0.0000000 1.0000000
PC21 0.0000002 0.0000000 1.0000000
PC22 0.0000000 0.0000000 1.0000000
scores_pca_manual <- as.matrix(df_med_scaled) %*% eigen_vectors_manual
head(scores_pca_manual)
##           [,1]        [,2]       [,3]       [,4]          [,5]          [,6]
## [1,] 0.9004235  0.05877780 0.27018743 -0.6244589  0.0396896761 -0.0036307050
## [2,] 0.9314044  0.16200982 0.16020171 -0.4156180 -0.0922514766 -0.1307730500
## [3,] 0.5246835 -0.08441405 0.35949071 -0.4586007 -0.0639643027  0.0001875561
## [4,] 0.9266989  0.43298766 0.04928632  0.7332024  0.2180295981 -0.3256663295
## [5,] 0.7942831 -0.04994436 0.17697253 -0.5428798  0.0003539282 -0.0157782489
## [6,] 0.9614402  0.24339753 0.14050150 -0.1440502 -0.0350638940 -0.1870070519
##             [,7]         [,8]          [,9]        [,10]        [,11]
## [1,] -0.08364019  0.034466137 -0.0177028130 -0.043133450 -0.039212126
## [2,] -0.05205450  0.027330484  0.0080991108  0.026044955 -0.001634054
## [3,] -0.18120661 -0.085107606 -0.0175093865  0.041663003  0.038968272
## [4,] -0.09050471 -0.027470648  0.0021297420 -0.007725320 -0.013454682
## [5,] -0.03174048  0.003393824 -0.0006248975 -0.033945374  0.019844532
## [6,] -0.07421066  0.010253036  0.0042237660  0.007338755 -0.010164589
##             [,12]        [,13]        [,14]        [,15]        [,16]
## [1,]  0.024369993 -0.014146473 -0.001306507  0.024413201 -0.002352774
## [2,]  0.001386426  0.002547264  0.001514197 -0.011078662 -0.005025632
## [3,]  0.045179399 -0.003628634 -0.033445316  0.023108627  0.023788100
## [4,]  0.001958845  0.005176775  0.007850534 -0.004750845 -0.006181017
## [5,] -0.001966991 -0.036140211 -0.005267282  0.039935478 -0.012960562
## [6,]  0.005168492 -0.001429394  0.002656469 -0.003531774 -0.006977464
##              [,17]         [,18]         [,19]         [,20]         [,21]
## [1,]  0.0009698838 -0.0027126927  1.720017e-04  0.0002565293  2.278286e-04
## [2,] -0.0016964745  0.0021087536  2.037386e-04  0.0002072516 -8.101792e-05
## [3,]  0.0034269602  0.0004365102 -1.103219e-03 -0.0001453891 -8.365906e-05
## [4,]  0.0002028357  0.0011149791  2.219595e-04 -0.0001484701 -3.060593e-05
## [5,]  0.0039967225  0.0028070864 -5.339321e-05 -0.0002025450 -7.509037e-05
## [6,] -0.0002649622  0.0012519337  1.830709e-04  0.0002145693 -6.217044e-05
##              [,22]
## [1,]  3.466698e-05
## [2,] -3.285652e-05
## [3,]  1.615282e-04
## [4,] -9.593451e-05
## [5,]  1.881789e-05
## [6,] -5.504854e-05

FA (DATA FINAL MENGGUNAKAN MEDIAN)

Tentukan Jumlah Faktor

Jumlah faktor ditentukan menggunakan Parallel Analysis. Metode ini membandingkan eigenvalue aktual dengan eigenvalue acak.

fa_par <- fa.parallel(df_med_scaled, fa = "fa", plot = TRUE)

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
k <- fa_par$nfact

FA (MEDIAN)

FA dilakukan menggunakan metode Principal Axis Factoring (PAF) dengan rotasi varimax. Rotasi varimax digunakan untuk mempermudah interpretasi faktor.

fa_result <- fa(df_med_scaled,
                nfactors = k,
                rotate = "varimax",
                fm = "pa")

print(fa_result$loadings, cutoff = 0.4)
## 
## Loadings:
##     PA1    PA3    PA2   
## X1                 0.979
## X2                 0.922
## X3          0.783       
## X4                      
## X5          0.956       
## X6          0.837       
## X7   0.733  0.677       
## X8   0.678  0.609       
## X9   0.949              
## X10  0.865  0.485       
## X11  0.682  0.722       
## X12  0.691  0.712       
## X13  0.958              
## X14  0.795  0.507       
## X15  0.615  0.769       
## X16  0.644  0.752       
## X17  0.962              
## X18  0.885  0.463       
## X19  0.939              
## X20  0.935              
## X21  0.889  0.458       
## X22  0.884  0.466       
## 
##                   PA1   PA3   PA2
## SS loadings    11.152 6.845 1.896
## Proportion Var  0.507 0.311 0.086
## Cumulative Var  0.507 0.818 0.904

FA MANUAL

fa_par <- fa.parallel(df_med_scaled, fa = "fa", plot = FALSE)
## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
k <- fa_par$nfact

# Hitung matriks korelasi
R <- cor(df_med_scaled)

# Hitung communalities awal
invR <- solve(R)
smc <- 1 - 1/diag(invR)

# Bentuk reduced correlation matrix
R_reduced <- R
diag(R_reduced) <- smc

# Eigen decomposition
eig_fa <- eigen(R_reduced)

lambda_fa <- eig_fa$values[1:k]
V_fa <- eig_fa$vectors[,1:k]

# Hitung factor loading
loadings_fa_manual <- sweep(V_fa, 2, sqrt(lambda_fa), "*")

# Hitung unique variance
psi <- 1 - rowSums(loadings_fa_manual^2)