Read Data
class <- read.csv("Placement_Data_Full_Class.csv", stringsAsFactors = T)
head(class,4)
class %>%
is.na() %>%
colSums() %>%
as.data.frame() %>%
rownames_to_column(var = "var") %>%
rename(total = 2) %>%
filter(total !=0) %>%
arrange(desc(total)) %>%
mutate(percent = total/nrow(.))
class_clean <- class %>%
select(-salary)
class_num <- class_clean %>%
select_if(is.numeric)
class_scale <- class_num %>%
scale()
class_fac <- class_clean %>%
select_if(is.factor)
plot(prcomp(x = class_scale))

pca_class <- prcomp(x = class_num, scale = T)
pca_class
## Standard deviations (1, .., p=6):
## [1] 1.5752131 1.0288493 0.8986343 0.8073614 0.7486821 0.6635300
##
## Rotation (n x k) = (6 x 6):
## PC1 PC2 PC3 PC4 PC5
## sl_no 0.06414311 0.88604130 0.37216416 -0.26605632 0.03020654
## ssc_p -0.50981647 -0.07478190 0.08656339 -0.30386655 0.17049563
## hsc_p -0.47475539 -0.08890939 0.03207312 -0.41378266 -0.69573721
## degree_p -0.48926990 -0.10083541 0.17341518 -0.08730727 0.65881029
## etest_p -0.30251357 0.41235497 -0.84825837 0.12267666 0.05523152
## mba_p -0.42388754 0.14570245 0.32149160 0.80186716 -0.22110411
## PC6
## sl_no 0.02473706
## ssc_p -0.77820176
## hsc_p 0.33227610
## degree_p 0.52794015
## etest_p 0.02840008
## mba_p -0.06209128
## [1] 1.5752131 1.0288493 0.8986343 0.8073614 0.7486821 0.6635300
## PC1 PC2 PC3 PC4 PC5
## sl_no 0.06414311 0.88604130 0.37216416 -0.26605632 0.03020654
## ssc_p -0.50981647 -0.07478190 0.08656339 -0.30386655 0.17049563
## hsc_p -0.47475539 -0.08890939 0.03207312 -0.41378266 -0.69573721
## degree_p -0.48926990 -0.10083541 0.17341518 -0.08730727 0.65881029
## etest_p -0.30251357 0.41235497 -0.84825837 0.12267666 0.05523152
## mba_p -0.42388754 0.14570245 0.32149160 0.80186716 -0.22110411
## PC6
## sl_no 0.02473706
## ssc_p -0.77820176
## hsc_p 0.33227610
## degree_p 0.52794015
## etest_p 0.02840008
## mba_p -0.06209128
## PC1 PC2 PC3 PC4 PC5 PC6
## [1,] 0.02826401 -2.226473 0.1337377 -1.0073058 -2.3202254 0.13131177
## [2,] -2.55581626 -1.295722 -0.9403594 0.2116305 0.2748488 0.24449926
## [3,] 0.34451068 -1.482512 -1.1295839 -0.1102483 -0.2240694 0.05845093
## [4,] 2.35086207 -1.349964 -0.8605552 1.0288581 -0.5173628 -0.67965747
## [5,] -1.82475219 -1.151147 -2.2353338 -1.1402480 0.7573914 -0.52666861
## [6,] 2.30298810 -2.043326 -0.2334751 -0.2297483 1.2253901 0.48004185
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.5752 1.0288 0.8986 0.8074 0.74868 0.66353
## Proportion of Variance 0.4135 0.1764 0.1346 0.1086 0.09342 0.07338
## Cumulative Proportion 0.4135 0.5900 0.7246 0.8332 0.92662 1.00000
class_80_percent <- as.data.frame(pca_class$x[,1:4])
head(class_80_percent)
biplot(pca_class, cex = 0.6)

fviz_contrib(pca_class,"var", 1)

# index category variables
qualivar <- c(2,4,6,7,9,10,12,14)
PCA(X = class_clean,
scale.unit = T,
quali.sup = qualivar,
ncp = 6,
graph = T)


## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 215 individuals, described by 14 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$quali.sup" "results for the supplementary categorical variables"
## 12 "$quali.sup$coord" "coord. for the supplementary categories"
## 13 "$quali.sup$v.test" "v-test of the supplementary categories"
## 14 "$call" "summary statistics"
## 15 "$call$centre" "mean of the variables"
## 16 "$call$ecart.type" "standard error of the variables"
## 17 "$call$row.w" "weights for the individuals"
## 18 "$call$col.w" "weights for the variables"
class_pca <- PCA(X = class_clean,
scale.unit = T,
quali.sup = qualivar,
ncp = 6,
graph = F)
head(class_pca$ind$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
## 1 -0.02832997 -2.231669 0.1340498 1.0096566 2.3256402 0.13161821
## 2 2.56178083 -1.298746 -0.9425539 -0.2121244 -0.2754902 0.24506986
## 3 -0.34531467 -1.485972 -1.1322200 0.1105056 0.2245923 0.05858734
## 4 -2.35634834 -1.353115 -0.8625635 -1.0312592 0.5185702 -0.68124361
## 5 1.82901066 -1.153834 -2.2405505 1.1429090 -0.7591589 -0.52789771
## 6 -2.30836265 -2.048094 -0.2340200 0.2302845 -1.2282498 0.48116214
plot.PCA(x = class_pca,
choix = "ind",
select = "contrib5",
habillage = "degree_t",
invisible = "quali")

plot.PCA(x = class_pca,
choix = "var")

dim_class <- dimdesc(class_pca)
# variable yang berkontribusi untuk PC1
as.data.frame(dim_class$Dim.1$quanti)
# variable yang berkontribusi untuk PC2
as.data.frame(dim_class$Dim.2$quanti)
# PCA Summary
class_pca$eig
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 2.4812963 41.354938 41.35494
## comp 2 1.0585308 17.642180 58.99712
## comp 3 0.8075435 13.459059 72.45618
## comp 4 0.6518325 10.863874 83.32005
## comp 5 0.5605249 9.342081 92.66213
## comp 6 0.4402721 7.337868 100.00000
# mengambil data hasil PCA sebanyak PC yang dibutuhkan:
class_keep <- as.data.frame(class_pca$ind$coord[,c(1:4)])
head(class_keep)
# reconstruct data menggunakan PC1 - PC4
class_reconst <- reconst(class_pca, ncp = 4)
head(class_reconst)
## sl_no ssc_p hsc_p degree_p etest_p mba_p
## 1 5.157929 72.38952 72.93303 68.73835 56.65180 55.85497
## 2 1.107270 80.88272 79.52852 75.19765 86.20628 66.72305
## 3 3.331106 65.90612 66.08950 64.85921 75.14226 57.53217
## 4 6.018099 51.22842 50.53848 57.14864 66.63561 58.51654
## 5 4.387242 79.96428 81.24945 71.67425 96.44322 56.28611
## 6 2.958610 56.78265 57.35244 59.44434 53.92049 53.33436
class1 <- read.csv("Placement_Data_Full_Class.csv")