1 Load the data requirement

library(tidyverse)
library(factoextra)
library(FactoMineR)
library(animation)
library(lubridate)

2 Read Data

class <- read.csv("Placement_Data_Full_Class.csv", stringsAsFactors = T)
head(class,4)
class %>% 
  is.na() %>% 
  colSums() %>% 
  as.data.frame() %>% 
  rownames_to_column(var = "var") %>% 
  rename(total = 2) %>% 
  filter(total !=0) %>% 
  arrange(desc(total)) %>% 
  mutate(percent = total/nrow(.))
class_clean <- class %>% 
  select(-salary)
class_num <- class_clean %>% 
  select_if(is.numeric) 

class_scale <- class_num %>% 
  scale()

class_fac <- class_clean %>% 
  select_if(is.factor)
plot(prcomp(x = class_scale))

pca_class <- prcomp(x = class_num, scale = T)
pca_class
## Standard deviations (1, .., p=6):
## [1] 1.5752131 1.0288493 0.8986343 0.8073614 0.7486821 0.6635300
## 
## Rotation (n x k) = (6 x 6):
##                  PC1         PC2         PC3         PC4         PC5
## sl_no     0.06414311  0.88604130  0.37216416 -0.26605632  0.03020654
## ssc_p    -0.50981647 -0.07478190  0.08656339 -0.30386655  0.17049563
## hsc_p    -0.47475539 -0.08890939  0.03207312 -0.41378266 -0.69573721
## degree_p -0.48926990 -0.10083541  0.17341518 -0.08730727  0.65881029
## etest_p  -0.30251357  0.41235497 -0.84825837  0.12267666  0.05523152
## mba_p    -0.42388754  0.14570245  0.32149160  0.80186716 -0.22110411
##                  PC6
## sl_no     0.02473706
## ssc_p    -0.77820176
## hsc_p     0.33227610
## degree_p  0.52794015
## etest_p   0.02840008
## mba_p    -0.06209128
pca_class$sdev
## [1] 1.5752131 1.0288493 0.8986343 0.8073614 0.7486821 0.6635300
pca_class$rotation
##                  PC1         PC2         PC3         PC4         PC5
## sl_no     0.06414311  0.88604130  0.37216416 -0.26605632  0.03020654
## ssc_p    -0.50981647 -0.07478190  0.08656339 -0.30386655  0.17049563
## hsc_p    -0.47475539 -0.08890939  0.03207312 -0.41378266 -0.69573721
## degree_p -0.48926990 -0.10083541  0.17341518 -0.08730727  0.65881029
## etest_p  -0.30251357  0.41235497 -0.84825837  0.12267666  0.05523152
## mba_p    -0.42388754  0.14570245  0.32149160  0.80186716 -0.22110411
##                  PC6
## sl_no     0.02473706
## ssc_p    -0.77820176
## hsc_p     0.33227610
## degree_p  0.52794015
## etest_p   0.02840008
## mba_p    -0.06209128
head(pca_class$x)
##              PC1       PC2        PC3        PC4        PC5         PC6
## [1,]  0.02826401 -2.226473  0.1337377 -1.0073058 -2.3202254  0.13131177
## [2,] -2.55581626 -1.295722 -0.9403594  0.2116305  0.2748488  0.24449926
## [3,]  0.34451068 -1.482512 -1.1295839 -0.1102483 -0.2240694  0.05845093
## [4,]  2.35086207 -1.349964 -0.8605552  1.0288581 -0.5173628 -0.67965747
## [5,] -1.82475219 -1.151147 -2.2353338 -1.1402480  0.7573914 -0.52666861
## [6,]  2.30298810 -2.043326 -0.2334751 -0.2297483  1.2253901  0.48004185
summary(pca_class)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6
## Standard deviation     1.5752 1.0288 0.8986 0.8074 0.74868 0.66353
## Proportion of Variance 0.4135 0.1764 0.1346 0.1086 0.09342 0.07338
## Cumulative Proportion  0.4135 0.5900 0.7246 0.8332 0.92662 1.00000
class_80_percent <- as.data.frame(pca_class$x[,1:4])
head(class_80_percent)
biplot(pca_class, cex = 0.6)

fviz_contrib(pca_class,"var", 1)

# index category variables
qualivar <- c(2,4,6,7,9,10,12,14)
PCA(X = class_clean,
                scale.unit = T,
                quali.sup = qualivar,
                ncp = 6,
                graph = T)

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 215 individuals, described by 14 variables
## *The results are available in the following objects:
## 
##    name                description                                          
## 1  "$eig"              "eigenvalues"                                        
## 2  "$var"              "results for the variables"                          
## 3  "$var$coord"        "coord. for the variables"                           
## 4  "$var$cor"          "correlations variables - dimensions"                
## 5  "$var$cos2"         "cos2 for the variables"                             
## 6  "$var$contrib"      "contributions of the variables"                     
## 7  "$ind"              "results for the individuals"                        
## 8  "$ind$coord"        "coord. for the individuals"                         
## 9  "$ind$cos2"         "cos2 for the individuals"                           
## 10 "$ind$contrib"      "contributions of the individuals"                   
## 11 "$quali.sup"        "results for the supplementary categorical variables"
## 12 "$quali.sup$coord"  "coord. for the supplementary categories"            
## 13 "$quali.sup$v.test" "v-test of the supplementary categories"             
## 14 "$call"             "summary statistics"                                 
## 15 "$call$centre"      "mean of the variables"                              
## 16 "$call$ecart.type"  "standard error of the variables"                    
## 17 "$call$row.w"       "weights for the individuals"                        
## 18 "$call$col.w"       "weights for the variables"
class_pca <- PCA(X = class_clean,
                scale.unit = T,
                quali.sup = qualivar,
                ncp = 6,
                graph = F)
head(class_pca$ind$coord)
##         Dim.1     Dim.2      Dim.3      Dim.4      Dim.5       Dim.6
## 1 -0.02832997 -2.231669  0.1340498  1.0096566  2.3256402  0.13161821
## 2  2.56178083 -1.298746 -0.9425539 -0.2121244 -0.2754902  0.24506986
## 3 -0.34531467 -1.485972 -1.1322200  0.1105056  0.2245923  0.05858734
## 4 -2.35634834 -1.353115 -0.8625635 -1.0312592  0.5185702 -0.68124361
## 5  1.82901066 -1.153834 -2.2405505  1.1429090 -0.7591589 -0.52789771
## 6 -2.30836265 -2.048094 -0.2340200  0.2302845 -1.2282498  0.48116214
plot.PCA(x = class_pca,
         choix = "ind",
         select = "contrib5",
         habillage = "degree_t",
         invisible = "quali")

plot.PCA(x = class_pca,
         choix = "var")

dim_class <- dimdesc(class_pca)
# variable yang berkontribusi untuk PC1
as.data.frame(dim_class$Dim.1$quanti)
# variable yang berkontribusi untuk PC2
as.data.frame(dim_class$Dim.2$quanti)
# PCA Summary
class_pca$eig
##        eigenvalue percentage of variance cumulative percentage of variance
## comp 1  2.4812963              41.354938                          41.35494
## comp 2  1.0585308              17.642180                          58.99712
## comp 3  0.8075435              13.459059                          72.45618
## comp 4  0.6518325              10.863874                          83.32005
## comp 5  0.5605249               9.342081                          92.66213
## comp 6  0.4402721               7.337868                         100.00000
# mengambil data hasil PCA sebanyak PC yang dibutuhkan:
class_keep <- as.data.frame(class_pca$ind$coord[,c(1:4)])
head(class_keep)
# reconstruct data menggunakan PC1 - PC4
class_reconst <- reconst(class_pca, ncp = 4)
head(class_reconst)
##      sl_no    ssc_p    hsc_p degree_p  etest_p    mba_p
## 1 5.157929 72.38952 72.93303 68.73835 56.65180 55.85497
## 2 1.107270 80.88272 79.52852 75.19765 86.20628 66.72305
## 3 3.331106 65.90612 66.08950 64.85921 75.14226 57.53217
## 4 6.018099 51.22842 50.53848 57.14864 66.63561 58.51654
## 5 4.387242 79.96428 81.24945 71.67425 96.44322 56.28611
## 6 2.958610 56.78265 57.35244 59.44434 53.92049 53.33436
class1 <- read.csv("Placement_Data_Full_Class.csv")