#install.packages("readxl")
#install.packages("pastecs")
#install.packages("TSstudio")

library(readxl)
## Warning: package 'readxl' was built under R version 4.0.5
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.7
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.0.5
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## The following object is masked from 'package:tidyr':
## 
##     extract
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.0.5
library(xts)
## Warning: package 'xts' was built under R version 4.0.5
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.5
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:pastecs':
## 
##     first, last
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(MASS)
## Warning: package 'MASS' was built under R version 4.0.5
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.5
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
#read excel file second worksheet
composite_index <- read_excel("Composite Index -1.xlsx", sheet=2)
## New names:
## * `` -> ...1
#create time series index from composit_index dataframe
composite_index_xts <- xts(composite_index[-1], composite_index[[1]])

# assuming you have a 1 column dataframe called "df"
cov_composite_index <- cov(composite_index_xts)

#plot the summary result for internet, mobile, credit, debit, emoney
summary_table <- stat.desc(cov_composite_index)

summary_table
##                   Internet        Mobile        Credit         Debit
## nbr.val       5.000000e+00  5.000000e+00  5.000000e+00  5.000000e+00
## nbr.null      0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
## nbr.na        0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
## min          -7.545116e+02 -6.550128e+05 -4.995375e+06 -2.078418e+05
## max           8.213714e+04  7.241868e+07  5.151259e+04  2.154378e+07
## range         8.289166e+04  7.307369e+07  5.046888e+06  2.175162e+07
## sum           9.557770e+04  8.438149e+07 -5.807472e+06  2.538406e+07
## median        3.234270e+03  2.760510e+06 -2.078418e+05  1.284374e+06
## mean          1.911554e+04  1.687630e+07 -1.161494e+06  5.076812e+06
## SE.mean       1.589086e+04  1.400967e+07  9.665227e+05  4.150769e+06
## CI.mean.0.95  4.412009e+04  3.889709e+07  2.683497e+06  1.152438e+07
## var           1.262597e+09  9.813546e+14  4.670830e+12  8.614443e+13
## std.dev       3.553304e+04  3.132658e+07  2.161210e+06  9.281402e+06
## coef.var      1.858856e+00  1.856247e+00 -1.860715e+00  1.828195e+00
##                     Emoney
## nbr.val       5.000000e+00
## nbr.null      0.000000e+00
## nbr.na        0.000000e+00
## min          -4.995375e+06
## max           5.467837e+08
## range         5.517791e+08
## sum           6.358329e+08
## median        2.154378e+07
## mean          1.271666e+08
## SE.mean       1.057944e+08
## CI.mean.0.95  2.937325e+08
## var           5.596232e+16
## std.dev       2.365636e+08
## coef.var      1.860265e+00
#display stad dev with specific column
sd(composite_index$Internet)
## [1] 3.532211
#create the variance covariance matrix for variables (internet, mobile, credit, debit, emoney)
sigma_first <- cov_composite_index

#create the mean vector
mu_first <- c(1.9115,1.6876,-1.1614,5.0768,1.2716)

#generate the multivariate normal distribution
data_first <- as.data.frame(mvrnorm(n=40, #40 observation
                                    mu=mu_first, Sigma=sigma_first)) 
data_first
##       Internet      Mobile      Credit       Debit      Emoney
## 1   6.09649383  4305.12990 -352.962363  2010.60931  26960.9366
## 2   3.33776743  1218.16387 -208.421986  1024.46159  10018.9041
## 3   0.09768967 -1708.63003   15.301426    88.67557 -16910.0951
## 4  -3.34680196 -4664.09350  375.116611  -926.59700 -33222.6100
## 5   0.80538559  -753.20649  132.783341 -1124.03718  -8799.8642
## 6   2.07812685  -752.91947  -86.641849   190.94372   -441.6693
## 7   7.36782377  5250.39562 -381.278578  1198.40400  35481.4010
## 8  -3.10132575 -4262.23473  245.253433 -1859.53494 -31901.1687
## 9  -0.33051459 -2422.76810  179.210637 -1051.45517 -14195.5412
## 10  4.64256233  1969.34563 -209.117432   770.16539  18614.4849
## 11  2.19149526  -108.40658    9.914327    32.08888   -673.1266
## 12 -2.13942529 -4077.28695  307.005694 -1429.23438 -29380.8633
## 13 -2.33660596 -3394.28314  312.341489  -612.54752 -31819.3659
## 14  5.84415946  3570.62669 -173.378728  1849.56435  29683.7886
## 15 -1.91516814 -3307.05563  108.455792  -213.37917 -19512.8654
## 16  2.08229659   -97.00085 -164.968873   496.19460   4084.3904
## 17  2.68994989  1471.29239  -75.365198   -63.10560   7180.8927
## 18  6.66608020  4119.21482 -238.435937  1057.67501  31901.6251
## 19  0.39072062 -1369.72765  148.040414  1208.37161 -10399.7393
## 20  3.99096934  1972.66289  -97.637474 -1145.14755  14359.2678
## 21  3.95310094  1802.33676 -167.786843   -87.57776  12761.6088
## 22  0.44028300 -1518.86263   64.551172   136.56845  -6464.3309
## 23  1.46945639 -1228.46082   -7.602836  1398.49557  -6250.0513
## 24  4.21542063  1777.21921 -102.733118   859.47284  19481.6884
## 25 -3.92108248 -5523.93651  289.771453  -914.53700 -39896.5386
## 26 -1.66161361 -4256.79955  199.929729  -939.26050 -25898.5150
## 27 -0.09302934 -1223.29858   18.822623  -470.50327 -12584.3868
## 28  4.20117254  2412.15590 -102.295727  -157.50566  10457.9964
## 29 -3.61090145 -4921.40408  362.795177  -608.68465 -37940.0293
## 30  6.85897439  4738.89181 -430.124772  2790.90949  31579.6019
## 31  8.15735054  6034.85941 -532.024957  3015.34230  44690.8397
## 32 -4.59684435 -5910.89742  367.727154 -1033.44582 -43586.5592
## 33 -4.56362575 -6297.81182  287.397220 -2290.39865 -44124.6226
## 34 -5.05686093 -5639.01106  493.668224 -1852.92294 -43642.4062
## 35  2.54728793  -519.62374   27.913840   152.21576  -1818.1710
## 36 -4.19117659 -5233.59218  421.983946 -1503.55076 -43939.8262
## 37  3.29441876   866.24075 -127.977530  -145.89308  10874.0921
## 38  2.63964829   520.38411   11.861802   -35.51122   5154.2225
## 39  1.36005661  -593.35894   41.874328  -398.16648  -2250.1586
## 40  5.30882826  2420.94137 -255.135283  2519.60890  18399.1714
ggpairs(data_first)

#Standard Normal Transform
data_first_std <- data_first %>% 
  mutate(Internet = (Internet-mean(Internet))/sd(Internet),
         Mobile = (Mobile-mean(Mobile))/sd(Mobile),
         Credit = (Credit-mean(Credit))/sd(Credit),
         Debit = (Debit-mean(Debit))/sd(Debit),
         Emoney = (Emoney-mean(Emoney))/sd(Emoney))

#Transformation doesn't really change the pattern of the data: compare 2 graphs below
data_first %>% ggplot(aes(x=1:40,y=Internet))+geom_line()

data_first_std %>% ggplot(aes(x=1:40,y=Internet))+geom_line()

cov(data_first)
##             Internet      Mobile      Credit        Debit       Emoney
## Internet    13.97867    12625.29     -902.78     3929.779     93485.84
## Mobile   12625.29189 11604528.83  -816903.86  3535168.292  84812996.27
## Credit    -902.77999  -816903.86    63051.00  -269618.240  -6071923.33
## Debit     3929.77871  3535168.29  -269618.24  1648239.020  26288392.50
## Emoney   93485.83667 84812996.27 -6071923.33 26288392.497 632364994.81
cov(data_first_std)
##            Internet     Mobile     Credit      Debit     Emoney
## Internet  1.0000000  0.9912763 -0.9616189  0.8187005  0.9943264
## Mobile    0.9912763  1.0000000 -0.9550172  0.8083254  0.9900674
## Credit   -0.9616189 -0.9550172  1.0000000 -0.8363599 -0.9616043
## Debit     0.8187005  0.8083254 -0.8363599  1.0000000  0.8142730
## Emoney    0.9943264  0.9900674 -0.9616043  0.8142730  1.0000000
# First Stage -------------------------------------------------------------


#PCA Dimension A
Dimension_First <- princomp(data_first_std,cor=F) #Since our data is already standardized, therefore we don't need to base our PCA calculation on correlation matrix
summary(Dimension_First) #Eigen Value
## Importance of components:
##                           Comp.1    Comp.2     Comp.3      Comp.4      Comp.5
## Standard deviation     2.1317132 0.5135192 0.22726282 0.100270821 0.073447104
## Proportion of Variance 0.9321438 0.0540927 0.01059454 0.002062408 0.001106559
## Cumulative Proportion  0.9321438 0.9862365 0.99683103 0.998893441 1.000000000
Dimension_First$sdev
##    Comp.1    Comp.2    Comp.3    Comp.4    Comp.5 
## 2.1317132 0.5135192 0.2272628 0.1002708 0.0734471
Dimension_First$loadings
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Internet  0.458  0.227  0.234  0.320  0.762
## Mobile    0.456  0.257  0.317 -0.782 -0.120
## Credit   -0.453         0.885              
## Debit     0.409 -0.904  0.124              
## Emoney    0.458  0.241  0.215  0.531 -0.636
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## SS loadings       1.0    1.0    1.0    1.0    1.0
## Proportion Var    0.2    0.2    0.2    0.2    0.2
## Cumulative Var    0.2    0.4    0.6    0.8    1.0
score_A <- (Dimension_First$scores %*% (matrix(Dimension_First$sdev^2)))/sum(Dimension_First$sdev^2)
weight_A <- (Dimension_First$loadings %*% matrix(Dimension_First$sdev^2))/sum(Dimension_First$sdev^2)


# Second Stage ------------------------------------------------------------

data_IFI <- data.frame(score_A)

# Now that we already have the sub-index score which is score A and score B
# we will proceed to perform the second stage PCA using both score
# However, we need to standardize the obtained dimension scores first, just like in the first stage

data_IFI_std <- data_IFI %>% 
  mutate(score_A = (score_A-mean(score_A))/sd(score_A))

IFI <- princomp(data_IFI_std,cor=F)

summary(IFI) #Eigen Value
## Importance of components:
##                           Comp.1
## Standard deviation     0.9874209
## Proportion of Variance 1.0000000
## Cumulative Proportion  1.0000000
IFI$sdev
##    Comp.1 
## 0.9874209
IFI$loadings
## 
## Loadings:
##         Comp.1
## score_A 1     
## 
##                Comp.1
## SS loadings         1
## Proportion Var      1
score_IFI <- (IFI$scores %*% (matrix(IFI$sdev^2)))/sum(IFI$sdev^2)
weight_IFI <- (IFI$loadings %*% matrix(IFI$sdev^2))/sum(IFI$sdev^2)

# Viz ---------------------------------------------------------------------

# Finally, now that we have obtained IFI from previous calculations,
# we will use line chart to visualize the result
# however, to make it easier to analyze the dataa, we will transform the obtained score of each dimension and IFI to its normal CDF distribution form
# so that the value will range from 0-1

data_Viz <- data.frame(dim_A = pnorm(score_A, mean=mean(score_A), sd=sd(score_A)),
                       IFI = pnorm(score_IFI, mean=mean(score_IFI), sd=sd(score_IFI)))
ggplot(data_Viz, aes(x=1:40)) + 
  geom_line(aes(y = dim_A, col = "green")) + 
  geom_line(aes(y = IFI, col = "black"))