Read Dataset

df <- read_csv("Global Economy Indicators.csv")

Import dataset

Dataset Global Economy Indicators
CountryID	Country	Year	AMA exchange rate	IMF based exchange rate	Population	Currency	Per capita GNI	Agriculture, hunting, forestry, fishing (ISIC A-B)	Changes in inventories	Construction (ISIC F)	Exports of goods and services	Final consumption expenditure	General government final consumption expenditure	Gross capital formation	Gross fixed capital formation (including Acquisitions less disposals of valuables)	Household consumption expenditure (including Non-profit institutions serving households)	Imports of goods and services	Manufacturing (ISIC D)	Mining, Manufacturing, Utilities (ISIC C-E)	Other Activities (ISIC J-P)	Total Value Added	Transport, storage and communication (ISIC I)	Wholesale, retail trade, restaurants and hotels (ISIC G-H)	Gross National Income(GNI) in USD	Gross Domestic Product (GDP)
4	Afghanistan	1970	0.0449984	0.0449984	10752971	Afghani	164	869917407	NA	46793902	165618722	1663221269	112126986	94611818	94611818	1551094283	195277226	370146827	376690811	127747843	1731454254	83917200	226387091	1766527525	1731435587
4	Afghanistan	1971	0.0449984	0.0449984	11015857	Afghani	168	910828104	NA	48994113	193580300	1796541240	121114833	99012350	99012350	1675426407	276296480	387549502	394401164	133754097	1812857077	87860382	237019196	1850121913	1812837521
4	Afghanistan	1972	0.0449984	0.0449984	11286753	Afghani	149	827945340	NA	44535223	227654380	1607159399	108347543	103456794	103456794	1498811856	290370350	352284669	358512865	121582672	1647917912	79864525	215477287	1683947905	1647900178
4	Afghanistan	1973	0.0449984	0.0449984	11575305	Afghani	150	855486925	NA	46018542	226913554	1617037233	109013455	121728433	121728433	1508023800	262962880	364010279	370445793	125630236	1702734673	82528885	222624293	1739998153	1702716294
4	Afghanistan	1974	0.0449984	0.0449984	11869879	Afghani	177	1035913365	NA	55721659	284938449	1907408182	128588961	175061875	175061875	1778819221	305679151	440760406	448552790	152119162	2061751510	99918604	269525910	2106420227	2061729287
4	Afghanistan	1975	0.0449984	0.0449984	12157386	Afghani	195	1165441381	NA	62686658	300493815	2131358499	143686711	221728484	221728484	1987671788	333827202	495891889	504659018	171142804	2319778374	112416374	303432162	2369877014	2319753506

Data Understanding

# Ukuran data
dim(df)

## [1] 10512    26

# Struktur dan tipe data
str(df)

## spc_tbl_ [10,512 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ CountryID                                                                               : num [1:10512] 4 4 4 4 4 4 4 4 4 4 ...
##  $ Country                                                                                 : chr [1:10512] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ Year                                                                                    : num [1:10512] 1970 1971 1972 1973 1974 ...
##  $ AMA exchange rate                                                                       : num [1:10512] 0.045 0.045 0.045 0.045 0.045 ...
##  $ IMF based exchange rate                                                                 : num [1:10512] 0.045 0.045 0.045 0.045 0.045 ...
##  $ Population                                                                              : num [1:10512] 10752971 11015857 11286753 11575305 11869879 ...
##  $ Currency                                                                                : chr [1:10512] "Afghani" "Afghani" "Afghani" "Afghani" ...
##  $ Per capita GNI                                                                          : num [1:10512] 164 168 149 150 177 195 207 231 254 285 ...
##  $ Agriculture, hunting, forestry, fishing (ISIC A-B)                                      : num [1:10512] 8.70e+08 9.11e+08 8.28e+08 8.55e+08 1.04e+09 ...
##  $ Changes in inventories                                                                  : num [1:10512] NA NA NA NA NA NA NA NA NA NA ...
##  $ Construction (ISIC F)                                                                   : num [1:10512] 46793902 48994113 44535223 46018542 55721659 ...
##  $ Exports of goods and services                                                           : num [1:10512] 1.66e+08 1.94e+08 2.28e+08 2.27e+08 2.85e+08 ...
##  $ Final consumption expenditure                                                           : num [1:10512] 1.66e+09 1.80e+09 1.61e+09 1.62e+09 1.91e+09 ...
##  $ General government final consumption expenditure                                        : num [1:10512] 1.12e+08 1.21e+08 1.08e+08 1.09e+08 1.29e+08 ...
##  $ Gross capital formation                                                                 : num [1:10512] 9.46e+07 9.90e+07 1.03e+08 1.22e+08 1.75e+08 ...
##  $ Gross fixed capital formation (including Acquisitions less disposals of valuables)      : num [1:10512] 9.46e+07 9.90e+07 1.03e+08 1.22e+08 1.75e+08 ...
##  $ Household consumption expenditure (including Non-profit institutions serving households): num [1:10512] 1.55e+09 1.68e+09 1.50e+09 1.51e+09 1.78e+09 ...
##  $ Imports of goods and services                                                           : num [1:10512] 1.95e+08 2.76e+08 2.90e+08 2.63e+08 3.06e+08 ...
##  $ Manufacturing (ISIC D)                                                                  : num [1:10512] 3.70e+08 3.88e+08 3.52e+08 3.64e+08 4.41e+08 ...
##  $ Mining, Manufacturing, Utilities (ISIC C-E)                                             : num [1:10512] 3.77e+08 3.94e+08 3.59e+08 3.70e+08 4.49e+08 ...
##  $ Other Activities (ISIC J-P)                                                             : num [1:10512] 1.28e+08 1.34e+08 1.22e+08 1.26e+08 1.52e+08 ...
##  $ Total Value Added                                                                       : num [1:10512] 1.73e+09 1.81e+09 1.65e+09 1.70e+09 2.06e+09 ...
##  $ Transport, storage and communication (ISIC I)                                           : num [1:10512] 83917200 87860382 79864525 82528885 99918604 ...
##  $ Wholesale, retail trade, restaurants and hotels (ISIC G-H)                              : num [1:10512] 2.26e+08 2.37e+08 2.15e+08 2.23e+08 2.70e+08 ...
##  $ Gross National Income(GNI) in USD                                                       : num [1:10512] 1.77e+09 1.85e+09 1.68e+09 1.74e+09 2.11e+09 ...
##  $ Gross Domestic Product (GDP)                                                            : num [1:10512] 1.73e+09 1.81e+09 1.65e+09 1.70e+09 2.06e+09 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   CountryID = col_double(),
##   ..   Country = col_character(),
##   ..   Year = col_double(),
##   ..   `AMA exchange rate` = col_double(),
##   ..   `IMF based exchange rate` = col_double(),
##   ..   Population = col_double(),
##   ..   Currency = col_character(),
##   ..   `Per capita GNI` = col_double(),
##   ..   `Agriculture, hunting, forestry, fishing (ISIC A-B)` = col_double(),
##   ..   `Changes in inventories` = col_double(),
##   ..   `Construction (ISIC F)` = col_double(),
##   ..   `Exports of goods and services` = col_double(),
##   ..   `Final consumption expenditure` = col_double(),
##   ..   `General government final consumption expenditure` = col_double(),
##   ..   `Gross capital formation` = col_double(),
##   ..   `Gross fixed capital formation (including Acquisitions less disposals of valuables)` = col_double(),
##   ..   `Household consumption expenditure (including Non-profit institutions serving households)` = col_double(),
##   ..   `Imports of goods and services` = col_double(),
##   ..   `Manufacturing (ISIC D)` = col_double(),
##   ..   `Mining, Manufacturing, Utilities (ISIC C-E)` = col_double(),
##   ..   `Other Activities (ISIC J-P)` = col_double(),
##   ..   `Total Value Added` = col_double(),
##   ..   `Transport, storage and communication (ISIC I)` = col_double(),
##   ..   `Wholesale, retail trade, restaurants and hotels (ISIC G-H)` = col_double(),
##   ..   `Gross National Income(GNI) in USD` = col_double(),
##   ..   `Gross Domestic Product (GDP)` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

# Statistik deskriptif
summary(df)

##    CountryID       Country               Year      AMA exchange rate  
##  Min.   :  4.0   Length:10512       Min.   :1970   Min.   :0.000e+00  
##  1st Qu.:212.0   Class :character   1st Qu.:1984   1st Qu.:1.000e+00  
##  Median :430.0   Mode  :character   Median :1997   Median :2.813e+00  
##  Mean   :431.1                      Mean   :1996   Mean   :3.574e+02  
##  3rd Qu.:643.0                      3rd Qu.:2009   3rd Qu.:5.134e+01  
##  Max.   :894.0                      Max.   :2021   Max.   :1.116e+05  
##                                                                       
##  IMF based exchange rate   Population          Currency        
##  Min.   :    0.000       Min.   :4.359e+03   Length:10512      
##  1st Qu.:    1.000       1st Qu.:6.331e+05   Class :character  
##  Median :    2.761       Median :5.052e+06   Mode  :character  
##  Mean   :  341.985       Mean   :2.852e+07                     
##  3rd Qu.:   48.067       3rd Qu.:1.679e+07                     
##  Max.   :42000.000       Max.   :1.426e+09                     
##                                                                
##  Per capita GNI   Agriculture, hunting, forestry, fishing (ISIC A-B)
##  Min.   :    34   Min.   :2.814e+04                                 
##  1st Qu.:   730   1st Qu.:1.305e+08                                 
##  Median :  2316   Median :9.314e+08                                 
##  Mean   :  8966   Mean   :7.793e+09                                 
##  3rd Qu.:  8966   3rd Qu.:4.023e+09                                 
##  Max.   :234317   Max.   :1.350e+12                                 
##                   NA's   :121                                       
##  Changes in inventories Construction (ISIC F) Exports of goods and services
##  Min.   :-1.510e+11     Min.   :1.577e+05     Min.   :-1.670e+09           
##  1st Qu.: 4.674e+05     1st Qu.:6.816e+07     1st Qu.: 4.027e+08           
##  Median : 6.830e+07     Median :3.718e+08     Median : 2.408e+09           
##  Mean   : 1.627e+09     Mean   :1.002e+10     Mean   : 4.671e+10           
##  3rd Qu.: 6.911e+08     3rd Qu.:2.816e+09     3rd Qu.: 1.542e+10           
##  Max.   : 2.110e+11     Max.   :1.240e+12     Max.   : 3.530e+12           
##  NA's   :1841                                 NA's   :21                   
##  Final consumption expenditure General government final consumption expenditure
##  Min.   :2.044e+06             Min.   :5.001e+05                               
##  1st Qu.:1.210e+09             1st Qu.:2.132e+08                               
##  Median :6.524e+09             Median :1.122e+09                               
##  Mean   :1.358e+11             Mean   :3.085e+10                               
##  3rd Qu.:3.784e+10             3rd Qu.:7.423e+09                               
##  Max.   :1.930e+13             Max.   :3.350e+12                               
##                                NA's   :52                                      
##  Gross capital formation
##  Min.   :-4.397e+10     
##  1st Qu.: 2.808e+08     
##  Median : 1.766e+09     
##  Mean   : 4.654e+10     
##  3rd Qu.: 1.241e+10     
##  Max.   : 7.600e+12     
##  NA's   :52             
##  Gross fixed capital formation (including Acquisitions less disposals of valuables)
##  Min.   :2.920e+05                                                                 
##  1st Qu.:2.735e+08                                                                 
##  Median :1.632e+09                                                                 
##  Mean   :4.519e+10                                                                 
##  3rd Qu.:1.166e+10                                                                 
##  Max.   :7.430e+12                                                                 
##  NA's   :52                                                                        
##  Household consumption expenditure (including Non-profit institutions serving households)
##  Min.   :7.960e+05                                                                       
##  1st Qu.:9.390e+08                                                                       
##  Median :5.154e+09                                                                       
##  Mean   :1.055e+11                                                                       
##  3rd Qu.:3.009e+10                                                                       
##  Max.   :1.590e+13                                                                       
##  NA's   :52                                                                              
##  Imports of goods and services Manufacturing (ISIC D)
##  Min.   :1.982e+06             Min.   :-2.485e+05    
##  1st Qu.:5.891e+08             1st Qu.: 1.109e+08    
##  Median :2.892e+09             Median : 9.114e+08    
##  Mean   :4.592e+10             Mean   : 3.093e+10    
##  3rd Qu.:1.602e+10             3rd Qu.: 7.405e+09    
##  Max.   :3.400e+12             Max.   : 4.870e+12    
##  NA's   :42                    NA's   :43            
##  Mining, Manufacturing, Utilities (ISIC C-E) Other Activities (ISIC J-P)
##  Min.   :-2.581e+06                          Min.   :1.044e+06          
##  1st Qu.: 1.857e+08                          1st Qu.:3.556e+08          
##  Median : 1.597e+09                          Median :2.107e+09          
##  Mean   : 4.153e+10                          Mean   :7.474e+10          
##  3rd Qu.: 1.251e+10                          3rd Qu.:1.442e+10          
##  Max.   : 5.780e+12                          Max.   :1.290e+13          
##                                                                         
##  Total Value Added   Transport, storage and communication (ISIC I)
##  Min.   :2.411e+06   Min.   :-6.479e+06                           
##  1st Qu.:1.356e+09   1st Qu.: 8.999e+07                           
##  Median :7.494e+09   Median : 5.442e+08                           
##  Mean   :1.745e+11   Mean   : 1.557e+10                           
##  3rd Qu.:4.819e+10   3rd Qu.: 3.976e+09                           
##  Max.   :2.330e+13   Max.   : 2.490e+12                           
##                      NA's   :49                                   
##  Wholesale, retail trade, restaurants and hotels (ISIC G-H)
##  Min.   :2.302e+05                                         
##  1st Qu.:2.079e+08                                         
##  Median :1.056e+09                                         
##  Mean   :2.510e+10                                         
##  3rd Qu.:6.707e+09                                         
##  Max.   :3.520e+12                                         
##  NA's   :49                                                
##  Gross National Income(GNI) in USD Gross Domestic Product (GDP)
##  Min.   :3.565e+06                 Min.   :2.585e+06           
##  1st Qu.:1.411e+09                 1st Qu.:1.439e+09           
##  Median :7.887e+09                 Median :8.071e+09           
##  Mean   :1.825e+11                 Mean   :1.829e+11           
##  3rd Qu.:5.006e+10                 3rd Qu.:5.173e+10           
##  Max.   :2.360e+13                 Max.   :2.330e+13           
##

# Range tahun
range(df$Year, na.rm = TRUE)

## [1] 1970 2021

# Jumlah missing value
colSums(is.na(df))

##                                                                                CountryID 
##                                                                                        0 
##                                                                                  Country 
##                                                                                        0 
##                                                                                     Year 
##                                                                                        0 
##                                                                        AMA exchange rate 
##                                                                                        0 
##                                                                  IMF based exchange rate 
##                                                                                        0 
##                                                                               Population 
##                                                                                        0 
##                                                                                 Currency 
##                                                                                        0 
##                                                                           Per capita GNI 
##                                                                                        0 
##                                       Agriculture, hunting, forestry, fishing (ISIC A-B) 
##                                                                                      121 
##                                                                   Changes in inventories 
##                                                                                     1841 
##                                                                    Construction (ISIC F) 
##                                                                                        0 
##                                                            Exports of goods and services 
##                                                                                       21 
##                                                            Final consumption expenditure 
##                                                                                        0 
##                                         General government final consumption expenditure 
##                                                                                       52 
##                                                                  Gross capital formation 
##                                                                                       52 
##       Gross fixed capital formation (including Acquisitions less disposals of valuables) 
##                                                                                       52 
## Household consumption expenditure (including Non-profit institutions serving households) 
##                                                                                       52 
##                                                            Imports of goods and services 
##                                                                                       42 
##                                                                   Manufacturing (ISIC D) 
##                                                                                       43 
##                                              Mining, Manufacturing, Utilities (ISIC C-E) 
##                                                                                        0 
##                                                              Other Activities (ISIC J-P) 
##                                                                                        0 
##                                                                        Total Value Added 
##                                                                                        0 
##                                            Transport, storage and communication (ISIC I) 
##                                                                                       49 
##                               Wholesale, retail trade, restaurants and hotels (ISIC G-H) 
##                                                                                       49 
##                                                        Gross National Income(GNI) in USD 
##                                                                                        0 
##                                                             Gross Domestic Product (GDP) 
##                                                                                        0

# Kelengkapan data per tahun
df %>% group_by(Year) %>% summarise(country_total = n(), missing_percent = mean(is.na(unlist(across()))) * 100) %>% 
kable() %>% kable_styling(full_width = FALSE) %>% scroll_box(width = "100%", height = "400px")

Year	country_total	missing_percent
1970	187	1.1976048
1971	187	1.1976048
1972	187	1.1548332
1973	187	1.1120616
1974	187	1.1548332
1975	187	1.1120616
1976	187	1.1334474
1977	187	1.1334474
1978	187	1.1334474
1979	187	1.0692900
1980	187	1.0692900
1981	187	1.0479042
1982	187	1.0265184
1983	187	1.0265184
1984	187	1.0051326
1985	187	0.9837468
1986	187	1.0051326
1987	187	0.9409752
1988	187	0.9409752
1989	188	0.9572431
1990	216	0.8887243
1991	211	0.8150114
1992	211	0.9097801
1993	211	0.8529189
1994	210	0.8379356
1995	210	0.8569796
1996	210	0.8379356
1997	210	0.8188916
1998	210	0.8950676
1999	210	0.8950676
2000	210	0.8760236
2001	210	0.8379356
2002	210	0.7617597
2003	210	0.7808037
2004	210	0.6855837
2005	212	0.7168459
2006	212	0.6979815
2007	212	0.7168459
2008	214	0.7101476
2009	214	0.7475238
2010	214	0.7475238
2011	213	0.7322569
2012	213	0.7510327
2013	212	0.7357102
2014	212	0.7545746
2015	212	0.7734390
2016	212	0.7923033
2017	212	0.8111677
2018	212	0.8488964
2019	212	0.8866252
2020	212	0.9432183
2021	212	1.0375401

# cek duplikasi
sum(duplicated(df))

## [1] 0

Data Cleaning

Filter Tahun 2021

Penelitian difokuskan pada tahun 2021 agar analisis merepresentasikan kondisi ekonomi global pada periode terbaru dalam dataset. Oleh karena itu, data difilter hanya untuk observasi tahun 2021.

df_2021 <- df %>% filter(Year == 2021)
dim(df_2021)

## [1] 212  26

Cek outlier Univariat

Outlier univariat dideteksi menggunakan metode boxplot berbasis IQR. Transformasi log (log1p) digunakan untuk mengurangi skewness pada data ekonomi yang biasanya memiliki rentang sangat besar.

df_2021_numeric <- df_2021 %>%
  select(where(is.numeric)) %>%
  select(-Year)

colnames(df_2021_numeric) <- paste0("X", 1:ncol(df_2021_numeric))

# Boxplot univariat (metode IQR)
df_2021_numeric %>%
  mutate(across(everything(), log1p)) %>%
  pivot_longer(everything()) %>%
  ggplot(aes(x = name, y = value)) +
  geom_boxplot(fill = "steelblue",
               outlier.color = "red",
               outlier.size = 2) +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 0)) +
  labs(title = "Deteksi Outlier Univariat (Boxplot - IQR)",
       x = "Indikator (X1–X22)",
       y = "Nilai (log scale)")

Visualisasi di atas membantu mengidentifikasi nilai ekstrem pada masing-masing indikator.

Imputasi Missing Value

Tiga metode imputasi dibandingkan:

# Median 
df_median <- df_2021 %>%
  mutate(across(where(is.numeric),
                ~ ifelse(is.na(.), median(., na.rm = TRUE), .)))

Mengganti nilai hilang dengan median variabel.

# Mean 
df_mean <- df_2021 %>%
  mutate(across(where(is.numeric),
                ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

Mengganti nilai hilang dengan rata-rata variabel.

# KNN
df_knn <- df_2021 %>%
  select(where(is.numeric)) %>%
  kNN(k = 5) %>%
  select(-ends_with("_imp"))

Menggunakan metode K-Nearest Neighbors (k = 5) untuk memperkirakan nilai hilang berdasarkan kedekatan observasi.

Perbandingan ini dilakukan untuk menentukan metode imputasi terbaik berdasarkan uji kelayakan data.

Drop Variabel Non-Indikator

Variabel non-numerik dan identitas negara dihapus karena tidak relevan dalam analisis PCA dan FA.

df_med_clean  <- df_median %>% select(-Country, -Currency, -CountryID, -Year)
df_mean_clean <- df_mean   %>% select(-Country, -Currency, -CountryID, -Year)
df_knn_clean  <- df_knn    %>% select(-Year)

Hapus Varians Nol

Variabel dengan varians nol dihapus karena tidak memberikan informasi dalam analisis multivariat.

clean_var <- function(x){
  x %>% select(where(~ sd(., na.rm = TRUE) > 0))
}

df_med_clean  <- clean_var(df_med_clean)
df_mean_clean <- clean_var(df_mean_clean)
df_knn_clean  <- clean_var(df_knn_clean)

Standardisasi

Data distandarisasi menggunakan fungsi scale() sehingga memiliki: Mean = 0 Standar deviasi = 1 Standardisasi diperlukan karena PCA dan FA sensitif terhadap perbedaan skala variabel.

df_med_scaled  <- as.data.frame(scale(df_med_clean))
df_mean_scaled <- as.data.frame(scale(df_mean_clean))
df_knn_scaled  <- as.data.frame(scale(df_knn_clean))

Data

Data penelitian terdiri dari 22 indikator ekonomi yang digunakan dalam analisis Principal Component Analysis (PCA) dan Factor Analysis (FA). Berikut adalah daftar variabel yang digunakan:

Simbol	Nama.Variabel
X1	AMA exchange rate
X2	IMF based exchange rate
X3	Population
X4	Per capita GNI
X5	Agriculture, hunting, forestry, fishing (ISIC A-B)
X6	Changes in inventories
X7	Construction (ISIC F)
X8	Exports of goods and services
X9	Final consumption expenditure
X10	General government final consumption expenditure
X11	Gross capital formation
X12	Gross fixed capital formation (including Acquisitions less disposals of valuables)
X13	Household consumption expenditure (including Non-profit institutions serving households)
X14	Imports of goods and services
X15	Manufacturing (ISIC D)
X16	Mining, Manufacturing, Utilities (ISIC C-E)
X17	Other Activities (ISIC J-P)
X18	Total Value Added
X19	Transport, storage and communication (ISIC I)
X20	Wholesale, retail trade, restaurants and hotels (ISIC G-H)
X21	Gross National Income (GNI) in USD
X22	Gross Domestic Product (GDP)

Cek outlier

Outlier multivariat dideteksi menggunakan Mahalanobis Distance. Cut-off ditentukan berdasarkan distribusi Chi-Square dengan derajat bebas sebesar jumlah variabel.

Observasi dengan nilai Mahalanobis melebihi cut-off dianggap sebagai outlier multivariat.

# Cek outlier (Mahalanobis Distance)
center <- colMeans(df_med_scaled)
cov_mat <- cov(df_med_scaled)

mahal_dist <- mahalanobis(df_med_scaled, center, cov_mat)
cutoff <- qchisq(0.975, df = ncol(df_med_scaled))
outlier_index <- which(mahal_dist > cutoff)

length(outlier_index)

## [1] 33

outlier_index

##  [1]   8   9  22  33  40  63  70  81  84  85  86  88  90  93  98 108 120 121 132
## [20] 142 152 159 167 172 174 176 179 185 188 194 202 205 209

plot(mahal_dist,
     type = "h",
     main = "Mahalanobis Distance Plot",
     xlab = "Observasi",
     ylab = "Mahalanobis Distance")
abline(h = cutoff, col = "red", lwd = 2)

Berdasarkan deteksi outlier secara univariat dan multivariat, ditemukan beberapa observasi yang teridentifikasi sebagai outlier. Namun karena jumlahnya relatif besar dan masih merepresentasikan variasi alami antar negara, observasi tersebut tidak dihapus agar tidak menghilangkan informasi penting dalam analisis.

Uji Kelayakan Data

Kaiser–Meyer–Olkin (KMO)

KMO mengukur kecukupan sampel untuk analisis faktor.

Kriteria interpretasi: 0.90 → Sangat baik 0.80–0.89 → Baik 0.70–0.79 → Cukup < 0.60 → Tidak layak

kmo_med  <- KMO(df_med_scaled)
kmo_mean <- KMO(df_mean_scaled)
kmo_knn  <- KMO(df_knn_scaled)

kmo_compare <- data.frame(
  Metode = c("Median","Mean","KNN"),
  KMO = c(kmo_med$MSA, kmo_mean$MSA, kmo_knn$MSA)
)

kable(kmo_compare) %>% kable_styling(full_width = FALSE)

Metode	KMO
Median	0.7436081
Mean	0.7529374
KNN	0.7392948

Meskipun nilai KMO metode mean sedikit lebih tinggi, imputasi median dipilih karena data ekonomi cenderung mengandung outlier. Mean sensitif terhadap nilai ekstrem, sedangkan median lebih robust dan lebih stabil dalam merepresentasikan kecenderungan sentral data. Oleh karena itu, median dinilai lebih sesuai untuk menjaga struktur asli data sebelum dilakukan PCA dan FA.

Bartlett

Bartlett menguji apakah matriks korelasi berbeda signifikan dari matriks identitas. Jika p-value < 0.05 → data layak untuk FA.

bart_med  <- cortest.bartlett(df_med_scaled)
bart_mean <- cortest.bartlett(df_mean_scaled)
bart_knn  <- cortest.bartlett(df_knn_scaled)

bart_compare <- data.frame(
  Metode = c("Median","Mean","KNN"),
  p_value = c(bart_med$p.value,
              bart_mean$p.value,
              bart_knn$p.value)
)

kable(bart_compare) %>% kable_styling(full_width = FALSE)

Metode	p_value
Median	0
Mean	0
KNN	0

Korelasi

Visualisasi korelasi dilakukan untuk melihat pola hubungan antar indikator ekonomi.

Adanya korelasi tinggi menunjukkan kemungkinan adanya struktur faktor laten.

kor <- cor(df_med_scaled)

corrplot(kor,
         method = "color",      
         type = "upper",        
         addCoef.col = "black", 
         number.cex = 0.7,      
         tl.cex = 0.8)

PCA (DATA FINAL MENGGUNAKAN MEDIAN)

Load PCA

PCA dilakukan menggunakan fungsi prcomp() pada data yang telah distandarisasi.

pca_final <- prcomp(df_med_scaled, scale. = FALSE)
summary(pca_final)

## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5    PC6     PC7
## Standard deviation     4.0268 1.5081 1.28843 0.99357 0.62321 0.4965 0.37645
## Proportion of Variance 0.7371 0.1034 0.07546 0.04487 0.01765 0.0112 0.00644
## Cumulative Proportion  0.7371 0.8404 0.91590 0.96077 0.97843 0.9896 0.99607
##                            PC8    PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.22122 0.1244 0.09507 0.07229 0.04948 0.04821 0.03651
## Proportion of Variance 0.00222 0.0007 0.00041 0.00024 0.00011 0.00011 0.00006
## Cumulative Proportion  0.99830 0.9990 0.99941 0.99965 0.99976 0.99987 0.99993
##                           PC15    PC16    PC17     PC18     PC19      PC20
## Standard deviation     0.02956 0.02388 0.01222 0.005283 0.001145 0.0007479
## Proportion of Variance 0.00004 0.00003 0.00001 0.000000 0.000000 0.0000000
## Cumulative Proportion  0.99997 0.99999 1.00000 1.000000 1.000000 1.0000000
##                            PC21      PC22
## Standard deviation     0.000403 0.0002022
## Proportion of Variance 0.000000 0.0000000
## Cumulative Proportion  1.000000 1.0000000

Hitung Eigenvalue dari hasil prcomp

Eigenvalue dihitung dari kuadrat standar deviasi komponen utama. Eigenvalue menunjukkan jumlah variasi yang dijelaskan oleh setiap komponen.

eigen_values <- pca_final$sdev^2

Kaiser Criterion

Komponen dipilih jika memiliki eigenvalue > 1.

sum(eigen_values > 1)

## [1] 3

Cumulative Variance

Menghitung proporsi dan kumulatif variasi yang dijelaskan oleh komponen utama.

prop_var <- eigen_values / sum(eigen_values)
cum_var <- cumsum(prop_var)
cum_var

##  [1] 0.7370582 0.8404448 0.9159012 0.9607731 0.9784273 0.9896310 0.9960727
##  [8] 0.9982971 0.9990003 0.9994111 0.9996487 0.9997600 0.9998656 0.9999262
## [15] 0.9999659 0.9999919 0.9999986 0.9999999 1.0000000 1.0000000 1.0000000
## [22] 1.0000000

Scree Plot

Scree plot digunakan untuk mengidentifikasi titik elbow sebagai alternatif penentuan jumlah komponen.

fviz_eig(pca_final, addlabels = TRUE)

Contribution Variabel

Menunjukkan variabel mana yang paling berkontribusi pada PC1 dan PC2.

# Ke PC1
fviz_contrib(
  pca_final,
  choice = "var",   
  axes = 1,         
  top = 15,         
  fill = "steelblue"
) +
  geom_hline(yintercept = 100/22, linetype = 2)

# Ke PC2
fviz_contrib(
  pca_final,
  choice = "var",
  axes = 2,
  top = 15,
  fill = "steelblue"
) +
  geom_hline(yintercept = 100/22, linetype = 2)

Loading PCA

Loading menunjukkan korelasi antara variabel asli dan komponen utama.

fviz_pca_var(pca_final,
             col.var = "contrib",
             gradient.cols = c("blue","orange","red"),
             repel = TRUE)

PCA MANUAL (MEDIAN)

cov_mat <- cov(df_med_scaled)
eig_manual <- eigen(cov_mat)

eigen_values_manual  <- eig_manual$values
eigen_vectors_manual <- eig_manual$vectors

prop_var_manual <- eigen_values_manual / sum(eigen_values_manual)
cum_var_manual  <- cumsum(prop_var_manual)

pca_manual <- data.frame(
  Komponen = paste0("PC", 1:length(eigen_values_manual)),
  Eigenvalue = eigen_values_manual,
  Proporsi = prop_var_manual,
  Kumulatif = cum_var_manual
)

kable(pca_manual) %>% kable_styling(full_width = FALSE)

Komponen	Eigenvalue	Proporsi	Kumulatif
PC1	16.2152794	0.7370582	0.7370582
PC2	2.2745056	0.1033866	0.8404448
PC3	1.6600415	0.0754564	0.9159012
PC4	0.9871808	0.0448719	0.9607731
PC5	0.3883928	0.0176542	0.9784273
PC6	0.2464823	0.0112037	0.9896310
PC7	0.1417162	0.0064416	0.9960727
PC8	0.0489369	0.0022244	0.9982971
PC9	0.0154703	0.0007032	0.9990003
PC10	0.0090389	0.0004109	0.9994111
PC11	0.0052266	0.0002376	0.9996487
PC12	0.0024478	0.0001113	0.9997600
PC13	0.0023246	0.0001057	0.9998656
PC14	0.0013328	0.0000606	0.9999262
PC15	0.0008739	0.0000397	0.9999659
PC16	0.0005704	0.0000259	0.9999919
PC17	0.0001493	0.0000068	0.9999986
PC18	0.0000279	0.0000013	0.9999999
PC19	0.0000013	0.0000001	1.0000000
PC20	0.0000006	0.0000000	1.0000000
PC21	0.0000002	0.0000000	1.0000000
PC22	0.0000000	0.0000000	1.0000000

scores_pca_manual <- as.matrix(df_med_scaled) %*% eigen_vectors_manual
head(scores_pca_manual)

##           [,1]        [,2]       [,3]       [,4]          [,5]          [,6]
## [1,] 0.9004235  0.05877780 0.27018743 -0.6244589  0.0396896761 -0.0036307050
## [2,] 0.9314044  0.16200982 0.16020171 -0.4156180 -0.0922514766 -0.1307730500
## [3,] 0.5246835 -0.08441405 0.35949071 -0.4586007 -0.0639643027  0.0001875561
## [4,] 0.9266989  0.43298766 0.04928632  0.7332024  0.2180295981 -0.3256663295
## [5,] 0.7942831 -0.04994436 0.17697253 -0.5428798  0.0003539282 -0.0157782489
## [6,] 0.9614402  0.24339753 0.14050150 -0.1440502 -0.0350638940 -0.1870070519
##             [,7]         [,8]          [,9]        [,10]        [,11]
## [1,] -0.08364019  0.034466137 -0.0177028130 -0.043133450 -0.039212126
## [2,] -0.05205450  0.027330484  0.0080991108  0.026044955 -0.001634054
## [3,] -0.18120661 -0.085107606 -0.0175093865  0.041663003  0.038968272
## [4,] -0.09050471 -0.027470648  0.0021297420 -0.007725320 -0.013454682
## [5,] -0.03174048  0.003393824 -0.0006248975 -0.033945374  0.019844532
## [6,] -0.07421066  0.010253036  0.0042237660  0.007338755 -0.010164589
##             [,12]        [,13]        [,14]        [,15]        [,16]
## [1,]  0.024369993 -0.014146473 -0.001306507  0.024413201 -0.002352774
## [2,]  0.001386426  0.002547264  0.001514197 -0.011078662 -0.005025632
## [3,]  0.045179399 -0.003628634 -0.033445316  0.023108627  0.023788100
## [4,]  0.001958845  0.005176775  0.007850534 -0.004750845 -0.006181017
## [5,] -0.001966991 -0.036140211 -0.005267282  0.039935478 -0.012960562
## [6,]  0.005168492 -0.001429394  0.002656469 -0.003531774 -0.006977464
##              [,17]         [,18]         [,19]         [,20]         [,21]
## [1,]  0.0009698838 -0.0027126927  1.720017e-04  0.0002565293  2.278286e-04
## [2,] -0.0016964745  0.0021087536  2.037386e-04  0.0002072516 -8.101792e-05
## [3,]  0.0034269602  0.0004365102 -1.103219e-03 -0.0001453891 -8.365906e-05
## [4,]  0.0002028357  0.0011149791  2.219595e-04 -0.0001484701 -3.060593e-05
## [5,]  0.0039967225  0.0028070864 -5.339321e-05 -0.0002025450 -7.509037e-05
## [6,] -0.0002649622  0.0012519337  1.830709e-04  0.0002145693 -6.217044e-05
##              [,22]
## [1,]  3.466698e-05
## [2,] -3.285652e-05
## [3,]  1.615282e-04
## [4,] -9.593451e-05
## [5,]  1.881789e-05
## [6,] -5.504854e-05

FA (DATA FINAL MENGGUNAKAN MEDIAN)

Tentukan Jumlah Faktor

Jumlah faktor ditentukan menggunakan Parallel Analysis. Metode ini membandingkan eigenvalue aktual dengan eigenvalue acak.

fa_par <- fa.parallel(df_med_scaled, fa = "fa", plot = TRUE)

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

k <- fa_par$nfact

FA (MEDIAN)

FA dilakukan menggunakan metode Principal Axis Factoring (PAF) dengan rotasi varimax. Rotasi varimax digunakan untuk mempermudah interpretasi faktor.

fa_result <- fa(df_med_scaled,
                nfactors = k,
                rotate = "varimax",
                fm = "pa")

print(fa_result$loadings, cutoff = 0.4)

## 
## Loadings:
##     PA1    PA3    PA2   
## X1                 0.979
## X2                 0.922
## X3          0.783       
## X4                      
## X5          0.956       
## X6          0.837       
## X7   0.733  0.677       
## X8   0.678  0.609       
## X9   0.949              
## X10  0.865  0.485       
## X11  0.682  0.722       
## X12  0.691  0.712       
## X13  0.958              
## X14  0.795  0.507       
## X15  0.615  0.769       
## X16  0.644  0.752       
## X17  0.962              
## X18  0.885  0.463       
## X19  0.939              
## X20  0.935              
## X21  0.889  0.458       
## X22  0.884  0.466       
## 
##                   PA1   PA3   PA2
## SS loadings    11.152 6.845 1.896
## Proportion Var  0.507 0.311 0.086
## Cumulative Var  0.507 0.818 0.904

FA MANUAL

fa_par <- fa.parallel(df_med_scaled, fa = "fa", plot = FALSE)

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

k <- fa_par$nfact

# Hitung matriks korelasi
R <- cor(df_med_scaled)

# Hitung communalities awal
invR <- solve(R)
smc <- 1 - 1/diag(invR)

# Bentuk reduced correlation matrix
R_reduced <- R
diag(R_reduced) <- smc

# Eigen decomposition
eig_fa <- eigen(R_reduced)

lambda_fa <- eig_fa$values[1:k]
V_fa <- eig_fa$vectors[,1:k]

# Hitung factor loading
loadings_fa_manual <- sweep(V_fa, 2, sqrt(lambda_fa), "*")

# Hitung unique variance
psi <- 1 - rowSums(loadings_fa_manual^2)

Implementasi Principal Component Analysis (PCA) dan Factor Analysis (FA) pada Dataset Global Economy Indicators Tahun 2021

Ruthtatia Grace Astridia (24031554072)

Cintiya Agustin Nareswari (24031554218)

2026-03-05