Import Data

data <- read.csv2("WEO_Data.csv", stringsAsFactors = FALSE)

str(data)
## 'data.frame':    1775 obs. of  7 variables:
##  $ Country                      : chr  "Albania" "Albania" "Albania" "Albania" ...
##  $ Subject.Descriptor           : chr  "Gross domestic product, constant prices" "Gross domestic product per capita, current prices" "Total investment" "Gross national savings" ...
##  $ Units                        : chr  "Percent change" "U.S. dollars" "Percent of GDP" "Percent of GDP" ...
##  $ Scale                        : chr  "" "Units" "" "" ...
##  $ Country.Series.specific.Notes: chr  "See notes for:  Gross domestic product, constant prices (National currency)." "See notes for:  Gross domestic product, current prices (National currency) Population (Persons)." "Source: IMF Staff Estimates. Official national accounts, including real GDP growth, rates available from 1996 o"| __truncated__ "Source: IMF Staff Estimates. Official national accounts, including real GDP growth, rates available from 1996 o"| __truncated__ ...
##  $ X2022                        : chr  "4.000" "6,369.009" "28.492" "14.597" ...
##  $ Estimates.Start.After        : int  2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
head(data)
##   Country                                Subject.Descriptor
## 1 Albania           Gross domestic product, constant prices
## 2 Albania Gross domestic product per capita, current prices
## 3 Albania                                  Total investment
## 4 Albania                            Gross national savings
## 5 Albania                Inflation, average consumer prices
## 6 Albania                                 Unemployment rate
##                          Units Scale
## 1               Percent change      
## 2                 U.S. dollars Units
## 3               Percent of GDP      
## 4               Percent of GDP      
## 5               Percent change      
## 6 Percent of total labor force      
##                                                                                                                                                                                                                                                                                                                                                                                                                               Country.Series.specific.Notes
## 1                                                                                                                                                                                                                                                                                                                                                                              See notes for:  Gross domestic product, constant prices (National currency).
## 2                                                                                                                                                                                                                                                                                                                                                          See notes for:  Gross domestic product, current prices (National currency) Population (Persons).
## 3 Source: IMF Staff Estimates. Official national accounts, including real GDP growth, rates available from 1996 onwards. Latest actual data: 2020 National accounts manual used: European System of Accounts (ESA) 2010 GDP valuation: Market prices Reporting in calendar year: Yes Start/end months of reporting year: January/December Base year: 1996 Chain-weighted: Yes, from 1996 Primary domestic currency: Albanian lek Data last updated: 09/2022
## 4 Source: IMF Staff Estimates. Official national accounts, including real GDP growth, rates available from 1996 onwards. Latest actual data: 2020 National accounts manual used: European System of Accounts (ESA) 2010 GDP valuation: Market prices Reporting in calendar year: Yes Start/end months of reporting year: January/December Base year: 1996 Chain-weighted: Yes, from 1996 Primary domestic currency: Albanian lek Data last updated: 09/2022
## 5                                                                                                                                                                                                                                                                                                                                                                                               See notes for:  Inflation, average consumer prices (Index).
## 6                                                                                                                                                                                                                                                                                       Source: National Statistics Office Latest actual data: 2020 Employment type: National definition Primary domestic currency: Albanian lek Data last updated: 09/2022
##       X2022 Estimates.Start.After
## 1     4.000                  2020
## 2 6,369.009                  2020
## 3    28.492                  2020
## 4    14.597                  2020
## 5     6.243                  2020
## 6    10.300                  2020
colnames(data)
## [1] "Country"                       "Subject.Descriptor"           
## [3] "Units"                         "Scale"                        
## [5] "Country.Series.specific.Notes" "X2022"                        
## [7] "Estimates.Start.After"

Memilih Variabel Penelitian

target_vars <- c(
"Gross domestic product, constant prices",
"Gross domestic product per capita, current prices",
"Total investment",
"Gross national savings",
"Inflation, average consumer prices",
"Unemployment rate",
"General government revenue",
"General government net lending/borrowing",
"General government gross debt",
"Current account balance"
)

data_subset <- data[data$Subject.Descriptor %in% target_vars, ]

Mengubah Data Long menjadi Wide

data_wide <- pivot_wider(
  data_subset,
  id_cols = Country,
  names_from = Subject.Descriptor,
  values_from = X2022
)

Menyiapkan Data Numerik

data_numeric <- data_wide[,-1]

data_numeric <- as.data.frame(
  lapply(data_numeric, function(x)
    as.numeric(gsub(",", "", x))
  )
)

df_raw <- data_numeric

Cek Missing Value

colSums(is.na(df_raw))
##           Gross.domestic.product..constant.prices 
##                                                 0 
## Gross.domestic.product.per.capita..current.prices 
##                                                 1 
##                                  Total.investment 
##                                                23 
##                            Gross.national.savings 
##                                                26 
##                Inflation..average.consumer.prices 
##                                                 0 
##                                 Unemployment.rate 
##                                                92 
##                        General.government.revenue 
##                                                 3 
##          General.government.net.lending.borrowing 
##                                                 3 
##                     General.government.gross.debt 
##                                                 7 
##                           Current.account.balance 
##                                                 2

Analisis Skewness

apply(df_raw,2,e1071::skewness,na.rm=TRUE)
##           Gross.domestic.product..constant.prices 
##                                         1.6883843 
## Gross.domestic.product.per.capita..current.prices 
##                                         2.1253248 
##                                  Total.investment 
##                                         1.7190098 
##                            Gross.national.savings 
##                                         0.7808055 
##                Inflation..average.consumer.prices 
##                                         7.1693992 
##                                 Unemployment.rate 
##                                         2.7088947 
##                        General.government.revenue 
##                                         3.4275177 
##          General.government.net.lending.borrowing 
##                                        -0.8492560 
##                     General.government.gross.debt 
##                                         1.5805015 
##                           Current.account.balance 
##                                         0.1369176

Imputasi Missing Value

df_imputed <- df_raw

for(i in 1:ncol(df_imputed)){
df_imputed[is.na(df_imputed[,i]),i] <- median(df_imputed[,i],na.rm=TRUE)
}

sum(is.na(df_imputed))
## [1] 0

Analisis Outlier

boxplot(df_imputed,
main="Boxplot Sebelum Penanganan Outlier",
col="orange",
las=2)

Penanganan Outlier (Winsorizing)

handle_outliers <- function(x){

qnt <- quantile(x,probs=c(.25,.75))

caps <- quantile(x,probs=c(.05,.95))

H <- 1.5*IQR(x)

x[x < (qnt[1]-H)] <- caps[1]
x[x > (qnt[2]+H)] <- caps[2]

return(x)
}

df_clean <- as.data.frame(lapply(df_imputed,handle_outliers))

boxplot(df_clean,
main="Boxplot Setelah Penanganan Outlier",
col="lightgreen",
las=2)

Statistik Deskriptif

cat("Jumlah Observasi:",nrow(df_clean))
## Jumlah Observasi: 193
cat("Jumlah Variabel:",ncol(df_clean))
## Jumlah Variabel: 10
stats_desc <- describe(df_clean)

kable(stats_desc,digits=2,
caption="Statistik Deskriptif") %>%
kable_styling(full_width=FALSE)
Statistik Deskriptif
vars n mean sd median trimmed mad min max range skew kurtosis se
Gross.domestic.product..constant.prices 1 193 3.70 2.42 3.60 3.70 2.14 -1.50 9.27 10.77 0.04 0.06 0.17
Gross.domestic.product.per.capita..current.prices 2 193 15565.25 18535.94 6717.39 11980.18 8199.73 292.62 60361.78 60069.16 1.39 0.68 1334.25
Total.investment 3 193 24.62 6.78 24.11 24.15 5.32 10.46 40.16 29.70 0.62 0.21 0.49
Gross.national.savings 4 193 21.76 9.27 21.14 21.37 8.21 1.79 43.02 41.23 0.34 -0.11 0.67
Inflation..average.consumer.prices 5 193 9.95 7.24 7.90 8.50 4.02 1.90 30.54 28.64 1.81 2.66 0.52
Unemployment.rate 6 193 6.91 3.35 6.11 6.70 0.32 3.00 12.54 9.54 0.74 -0.72 0.24
General.government.revenue 7 193 28.90 12.04 27.14 28.35 13.21 7.02 61.56 54.54 0.37 -0.68 0.87
General.government.net.lending.borrowing 8 193 -3.29 3.66 -3.83 -3.55 2.73 -9.98 5.49 15.48 0.71 0.39 0.26
General.government.gross.debt 9 193 60.45 30.20 56.27 58.54 27.83 0.00 133.95 133.95 0.51 -0.11 2.17
Current.account.balance 10 193 -2.70 8.43 -3.45 -3.18 6.78 -20.31 16.11 36.42 0.46 0.00 0.61

Standarisasi Data

df_scaled <- as.data.frame(scale(df_clean))

Matriks Korelasi

cor_matrix <- cor(df_clean)

corrplot(cor_matrix,
         method = "color",
         type = "upper",
         addCoef.col = "black",
         tl.col = "black",
         tl.srt = 45,
         number.cex = 0.7,
         tl.cex = 0.7,
         diag = FALSE,
         mar = c(0,0,2,0),
         title = "Matriks Korelasi Indikator Makroekonomi")

Uji Kelayakan (KMO dan Bartlett)

res_kmo <- KMO(df_clean)

res_kmo
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = df_clean)
## Overall MSA =  0.5
## MSA for each item = 
##           Gross.domestic.product..constant.prices 
##                                              0.39 
## Gross.domestic.product.per.capita..current.prices 
##                                              0.72 
##                                  Total.investment 
##                                              0.21 
##                            Gross.national.savings 
##                                              0.44 
##                Inflation..average.consumer.prices 
##                                              0.56 
##                                 Unemployment.rate 
##                                              0.63 
##                        General.government.revenue 
##                                              0.61 
##          General.government.net.lending.borrowing 
##                                              0.77 
##                     General.government.gross.debt 
##                                              0.62 
##                           Current.account.balance 
##                                              0.49
cortest.bartlett(cor(df_clean),n=nrow(df_clean))
## $chisq
## [1] 520.582
## 
## $p.value
## [1] 3.545043e-82
## 
## $df
## [1] 45

PCA

pca_obj <- principal(df_clean,
nfactors=ncol(df_clean),
rotate="none")

pca_obj
## Principal Components Analysis
## Call: principal(r = df_clean, nfactors = ncol(df_clean), rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                                     PC1   PC2   PC3   PC4   PC5
## Gross.domestic.product..constant.prices           -0.01  0.46  0.36 -0.67  0.16
## Gross.domestic.product.per.capita..current.prices  0.65  0.08  0.42  0.30 -0.07
## Total.investment                                   0.03  0.79 -0.49  0.23  0.06
## Gross.national.savings                             0.72  0.41 -0.43  0.00  0.13
## Inflation..average.consumer.prices                -0.31 -0.38 -0.48  0.28  0.26
## Unemployment.rate                                 -0.34 -0.01  0.08 -0.02  0.86
## General.government.revenue                         0.48  0.03  0.39  0.54  0.19
## General.government.net.lending.borrowing           0.69 -0.29 -0.04 -0.18  0.34
## General.government.gross.debt                     -0.40  0.34  0.43  0.30  0.13
## Current.account.balance                            0.81 -0.21 -0.07 -0.17  0.06
##                                                     PC6   PC7   PC8   PC9  PC10
## Gross.domestic.product..constant.prices            0.16  0.39 -0.09 -0.07  0.01
## Gross.domestic.product.per.capita..current.prices  0.23  0.00 -0.38  0.33  0.00
## Total.investment                                  -0.17  0.06  0.01  0.13  0.16
## Gross.national.savings                             0.20 -0.11  0.00 -0.12 -0.22
## Inflation..average.consumer.prices                 0.46  0.41 -0.11 -0.01  0.01
## Unemployment.rate                                 -0.14 -0.27 -0.20  0.00  0.00
## General.government.revenue                        -0.34  0.29  0.06 -0.31  0.00
## General.government.net.lending.borrowing          -0.13  0.14  0.37  0.34  0.00
## General.government.gross.debt                      0.53 -0.12  0.36  0.02  0.00
## Current.account.balance                            0.34 -0.21  0.00 -0.25  0.19
##                                                   h2       u2 com
## Gross.domestic.product..constant.prices            1  4.4e-16 3.5
## Gross.domestic.product.per.capita..current.prices  1  3.3e-16 4.0
## Total.investment                                   1 -1.8e-15 2.2
## Gross.national.savings                             1  7.8e-16 3.0
## Inflation..average.consumer.prices                 1  1.2e-15 6.1
## Unemployment.rate                                  1  0.0e+00 1.7
## General.government.revenue                         1  7.8e-16 5.3
## General.government.net.lending.borrowing           1  4.4e-16 3.6
## General.government.gross.debt                      1 -2.2e-16 5.6
## Current.account.balance                            1  1.1e-16 2.2
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6  PC7  PC8  PC9 PC10
## SS loadings           2.68 1.40 1.30 1.11 1.04 0.90 0.57 0.48 0.42 0.11
## Proportion Var        0.27 0.14 0.13 0.11 0.10 0.09 0.06 0.05 0.04 0.01
## Cumulative Var        0.27 0.41 0.54 0.65 0.75 0.84 0.90 0.95 0.99 1.00
## Proportion Explained  0.27 0.14 0.13 0.11 0.10 0.09 0.06 0.05 0.04 0.01
## Cumulative Proportion 0.27 0.41 0.54 0.65 0.75 0.84 0.90 0.95 0.99 1.00
## 
## Mean item complexity =  3.7
## Test of the hypothesis that 10 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0 
##  with the empirical chi square  0  with prob <  NA 
## 
## Fit based upon off diagonal values = 1

Variance PCA

eigenvalues <- pca_obj$values

prop_variance <- eigenvalues/sum(eigenvalues)

cum_variance <- cumsum(prop_variance)

data.frame(
Eigenvalue=eigenvalues,
Proporsi=prop_variance,
Kumulatif=cum_variance
)
##    Eigenvalue   Proporsi Kumulatif
## 1   2.6799173 0.26799173 0.2679917
## 2   1.3967176 0.13967176 0.4076635
## 3   1.3048313 0.13048313 0.5381466
## 4   1.1117006 0.11117006 0.6493167
## 5   1.0359905 0.10359905 0.7529157
## 6   0.9048374 0.09048374 0.8433995
## 7   0.5652881 0.05652881 0.8999283
## 8   0.4751623 0.04751623 0.9474445
## 9   0.4153766 0.04153766 0.9889822
## 10  0.1101783 0.01101783 1.0000000

Scree Plot

plot(eigenvalues,
type="b",
pch=19,
main="Scree Plot PCA",
xlab="Komponen",
ylab="Eigenvalue")

abline(h=1,col="red",lty=2)

Loading PCA

loadings_matrix <- as.data.frame(unclass(pca_obj$loadings))

kable(round(loadings_matrix[,1:4],3),
caption="Component Loading PCA") %>%
kable_styling(full_width=FALSE)
Component Loading PCA
PC1 PC2 PC3 PC4
Gross.domestic.product..constant.prices -0.005 0.455 0.357 -0.671
Gross.domestic.product.per.capita..current.prices 0.647 0.082 0.421 0.301
Total.investment 0.027 0.793 -0.490 0.225
Gross.national.savings 0.722 0.409 -0.426 -0.001
Inflation..average.consumer.prices -0.308 -0.380 -0.480 0.277
Unemployment.rate -0.341 -0.013 0.080 -0.025
General.government.revenue 0.482 0.031 0.386 0.537
General.government.net.lending.borrowing 0.689 -0.291 -0.044 -0.180
General.government.gross.debt -0.401 0.337 0.432 0.305
Current.account.balance 0.813 -0.207 -0.066 -0.172

Parallel Analysis

fa.parallel(df_clean,
fa="fa",
main="Parallel Analysis")

## Parallel analysis suggests that the number of factors =  6  and the number of components =  NA

Menentukan Jumlah Faktor

n_factors_final <- sum(eigenvalues>1)

n_factors_final
## [1] 5

Factor Analysis

fa_res <- fa(df_clean,
nfactors=n_factors_final,
rotate="varimax",
fm="minres")

fa_res
## Factor Analysis using method =  minres
## Call: fa(r = df_clean, nfactors = n_factors_final, rotate = "varimax", 
##     fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                                     MR1   MR2   MR3   MR4   MR5
## Gross.domestic.product..constant.prices           -0.01  0.01  0.99  0.08 -0.03
## Gross.domestic.product.per.capita..current.prices  0.36 -0.04  0.03  0.09  0.62
## Total.investment                                  -0.06  1.00  0.02  0.02  0.01
## Gross.national.savings                             0.75  0.51  0.05 -0.05  0.11
## Inflation..average.consumer.prices                -0.04 -0.04 -0.26  0.07 -0.34
## Unemployment.rate                                 -0.19 -0.03  0.03  0.09 -0.13
## General.government.revenue                         0.11  0.02 -0.08 -0.02  0.63
## General.government.net.lending.borrowing           0.47 -0.09  0.02 -0.25  0.25
## General.government.gross.debt                     -0.22  0.00  0.05  0.97  0.02
## Current.account.balance                            0.93 -0.17  0.02 -0.08  0.15
##                                                      h2     u2 com
## Gross.domestic.product..constant.prices           0.998 0.0024 1.0
## Gross.domestic.product.per.capita..current.prices 0.525 0.4751 1.7
## Total.investment                                  0.995 0.0047 1.0
## Gross.national.savings                            0.842 0.1582 1.9
## Inflation..average.consumer.prices                0.189 0.8106 2.1
## Unemployment.rate                                 0.062 0.9383 2.5
## General.government.revenue                        0.421 0.5788 1.1
## General.government.net.lending.borrowing          0.351 0.6493 2.2
## General.government.gross.debt                     0.999 0.0014 1.1
## Current.account.balance                           0.912 0.0876 1.1
## 
##                        MR1  MR2  MR3  MR4  MR5
## SS loadings           1.86 1.30 1.07 1.05 1.02
## Proportion Var        0.19 0.13 0.11 0.11 0.10
## Cumulative Var        0.19 0.32 0.42 0.53 0.63
## Proportion Explained  0.30 0.21 0.17 0.17 0.16
## Cumulative Proportion 0.30 0.50 0.67 0.84 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 5 factors are sufficient.
## 
## df null model =  45  with the objective function =  2.77 with Chi Square =  520.58
## df of  the model are 5  and the objective function was  0.09 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.09 
## 
## The harmonic n.obs is  193 with the empirical chi square  8.18  with prob <  0.15 
## The total n.obs was  193  with Likelihood Chi Square =  17.34  with prob <  0.0039 
## 
## Tucker Lewis Index of factoring reliability =  0.762
## RMSEA index =  0.113  and the 90 % confidence intervals are  0.058 0.174
## BIC =  -8.98
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    MR1  MR2  MR3  MR4  MR5
## Correlation of (regression) scores with factors   0.96 1.00 1.00 1.00 0.78
## Multiple R square of scores with factors          0.92 0.99 1.00 0.99 0.61
## Minimum correlation of possible factor scores     0.83 0.99 0.99 0.99 0.21

Factor Loading

print(fa_res$loadings,cutoff=0.4)
## 
## Loadings:
##                                                   MR1    MR2    MR3    MR4   
## Gross.domestic.product..constant.prices                          0.995       
## Gross.domestic.product.per.capita..current.prices                            
## Total.investment                                          0.995              
## Gross.national.savings                             0.747  0.515              
## Inflation..average.consumer.prices                                           
## Unemployment.rate                                                            
## General.government.revenue                                                   
## General.government.net.lending.borrowing           0.466                     
## General.government.gross.debt                                           0.974
## Current.account.balance                            0.926                     
##                                                   MR5   
## Gross.domestic.product..constant.prices                 
## Gross.domestic.product.per.capita..current.prices  0.622
## Total.investment                                        
## Gross.national.savings                                  
## Inflation..average.consumer.prices                      
## Unemployment.rate                                       
## General.government.revenue                         0.634
## General.government.net.lending.borrowing                
## General.government.gross.debt                           
## Current.account.balance                                 
## 
##                  MR1   MR2   MR3   MR4   MR5
## SS loadings    1.858 1.297 1.073 1.050 1.015
## Proportion Var 0.186 0.130 0.107 0.105 0.102
## Cumulative Var 0.186 0.316 0.423 0.528 0.629

Communality

fa_res$communality
##           Gross.domestic.product..constant.prices 
##                                        0.99755261 
## Gross.domestic.product.per.capita..current.prices 
##                                        0.52485040 
##                                  Total.investment 
##                                        0.99529272 
##                            Gross.national.savings 
##                                        0.84177533 
##                Inflation..average.consumer.prices 
##                                        0.18944675 
##                                 Unemployment.rate 
##                                        0.06172196 
##                        General.government.revenue 
##                                        0.42115909 
##          General.government.net.lending.borrowing 
##                                        0.35065181 
##                     General.government.gross.debt 
##                                        0.99861231 
##                           Current.account.balance 
##                                        0.91239570

Factor Scores

factor_scores <- factor.scores(df_clean,fa_res)

head(factor_scores$scores)
##             MR1        MR2         MR3        MR4        MR5
## [1,] -0.8290228  0.4916485  0.06019258  0.1009657 -0.0744428
## [2,]  1.6226893  1.8466496  0.36666480  0.3757603 -0.9354594
## [3,]  1.3980180 -0.0621761  1.32310813 -0.3486771  1.0525576
## [4,]  2.0118563 -0.1592663 -0.40036791  0.3790579 -1.5460635
## [5,] -1.7059551  2.1968849  0.87319467  0.6715522  0.3121221
## [6,]  0.3817650 -0.5495017  0.06015940  0.5622553 -0.6799843

Diagram Struktur Faktor

fa.diagram(fa_res,
main="Diagram Struktur Faktor")