df <- read.csv("Dry_Bean_Dataset.csv")
head(df)
##    Area Perimeter MajorAxisLength MinorAxisLength AspectRation Eccentricity
## 1 28395   610.291        208.1781        173.8887     1.197191    0.5498122
## 2 28734   638.018        200.5248        182.7344     1.097356    0.4117853
## 3 29380   624.110        212.8261        175.9311     1.209713    0.5627273
## 4 30008   645.884        210.5580        182.5165     1.153638    0.4986160
## 5 30140   620.134        201.8479        190.2793     1.060798    0.3336797
## 6 30279   634.927        212.5606        181.5102     1.171067    0.5204007
##   ConvexArea EquivDiameter    Extent  Solidity roundness Compactness
## 1      28715      190.1411 0.7639225 0.9888560 0.9580271   0.9133578
## 2      29172      191.2728 0.7839681 0.9849856 0.8870336   0.9538608
## 3      29690      193.4109 0.7781132 0.9895588 0.9478495   0.9087742
## 4      30724      195.4671 0.7826813 0.9766957 0.9039364   0.9283288
## 5      30417      195.8965 0.7730980 0.9908933 0.9848771   0.9705155
## 6      30600      196.3477 0.7756885 0.9895098 0.9438518   0.9237260
##   ShapeFactor1 ShapeFactor2 ShapeFactor3 ShapeFactor4 Class
## 1  0.007331506  0.003147289    0.8342224    0.9987239 SEKER
## 2  0.006978659  0.003563624    0.9098505    0.9984303 SEKER
## 3  0.007243912  0.003047733    0.8258706    0.9990661 SEKER
## 4  0.007016729  0.003214562    0.8617944    0.9941988 SEKER
## 5  0.006697010  0.003664972    0.9419004    0.9991661 SEKER
## 6  0.007020065  0.003152779    0.8532696    0.9992358 SEKER
dim(df)
## [1] 13611    17
str(df)
## 'data.frame':    13611 obs. of  17 variables:
##  $ Area           : int  28395 28734 29380 30008 30140 30279 30477 30519 30685 30834 ...
##  $ Perimeter      : num  610 638 624 646 620 ...
##  $ MajorAxisLength: num  208 201 213 211 202 ...
##  $ MinorAxisLength: num  174 183 176 183 190 ...
##  $ AspectRation   : num  1.2 1.1 1.21 1.15 1.06 ...
##  $ Eccentricity   : num  0.55 0.412 0.563 0.499 0.334 ...
##  $ ConvexArea     : int  28715 29172 29690 30724 30417 30600 30970 30847 31044 31120 ...
##  $ EquivDiameter  : num  190 191 193 195 196 ...
##  $ Extent         : num  0.764 0.784 0.778 0.783 0.773 ...
##  $ Solidity       : num  0.989 0.985 0.99 0.977 0.991 ...
##  $ roundness      : num  0.958 0.887 0.948 0.904 0.985 ...
##  $ Compactness    : num  0.913 0.954 0.909 0.928 0.971 ...
##  $ ShapeFactor1   : num  0.00733 0.00698 0.00724 0.00702 0.0067 ...
##  $ ShapeFactor2   : num  0.00315 0.00356 0.00305 0.00321 0.00366 ...
##  $ ShapeFactor3   : num  0.834 0.91 0.826 0.862 0.942 ...
##  $ ShapeFactor4   : num  0.999 0.998 0.999 0.994 0.999 ...
##  $ Class          : chr  "SEKER" "SEKER" "SEKER" "SEKER" ...
colSums(is.na(df))
##            Area       Perimeter MajorAxisLength MinorAxisLength    AspectRation 
##               0               0               0               0               0 
##    Eccentricity      ConvexArea   EquivDiameter          Extent        Solidity 
##               0               0               0               0               0 
##       roundness     Compactness    ShapeFactor1    ShapeFactor2    ShapeFactor3 
##               0               0               0               0               0 
##    ShapeFactor4           Class 
##               0               0
data_numeric <- df[, sapply(df, is.numeric)]
# Statistik Deskriptif
desc <- describe(data_numeric)
kable(desc[,c("mean","sd","min","max")],
      caption = "Tabel 1. Statistik Deskriptif Variabel")
Tabel 1. Statistik Deskriptif Variabel
mean sd min max
Area 5.304828e+04 2.932410e+04 2.042000e+04 2.546160e+05
Perimeter 8.552835e+02 2.142897e+02 5.247360e+02 1.985370e+03
MajorAxisLength 3.201419e+02 8.569419e+01 1.836012e+02 7.388602e+02
MinorAxisLength 2.022707e+02 4.497009e+01 1.225127e+02 4.601985e+02
AspectRation 1.583242e+00 2.466785e-01 1.024868e+00 2.430306e+00
Eccentricity 7.508949e-01 9.200180e-02 2.189513e-01 9.114230e-01
ConvexArea 5.376820e+04 2.977492e+04 2.068400e+04 2.632610e+05
EquivDiameter 2.530642e+02 5.917712e+01 1.612438e+02 5.693744e+02
Extent 7.497328e-01 4.908640e-02 5.553147e-01 8.661946e-01
Solidity 9.871428e-01 4.660400e-03 9.192462e-01 9.946775e-01
roundness 8.732818e-01 5.951990e-02 4.896183e-01 9.906854e-01
Compactness 7.998637e-01 6.171350e-02 6.405768e-01 9.873030e-01
ShapeFactor1 6.563600e-03 1.128000e-03 2.778000e-03 1.045120e-02
ShapeFactor2 1.715900e-03 5.959000e-04 5.642000e-04 3.665000e-03
ShapeFactor3 6.435902e-01 9.899620e-02 4.103386e-01 9.747672e-01
ShapeFactor4 9.950633e-01 4.366500e-03 9.476874e-01 9.997325e-01
# Matriks Korelasi
cor_matrix <- cor(data_numeric)

corrplot(cor_matrix,
         method="color",
         type="upper",
         tl.cex=0.6)

KMO(data_numeric)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_numeric)
## Overall MSA =  0.67
## MSA for each item = 
##            Area       Perimeter MajorAxisLength MinorAxisLength    AspectRation 
##            0.66            0.82            0.68            0.58            0.60 
##    Eccentricity      ConvexArea   EquivDiameter          Extent        Solidity 
##            0.70            0.67            0.63            1.00            0.43 
##       roundness     Compactness    ShapeFactor1    ShapeFactor2    ShapeFactor3 
##            0.74            0.66            0.78            0.85            0.68 
##    ShapeFactor4 
##            0.40
cortest.bartlett(data_numeric)
## R was not square, finding R from data
## $chisq
## [1] 1046464
## 
## $p.value
## [1] 0
## 
## $df
## [1] 120
data_scaled <- scale(data_numeric)
pca <- prcomp(data_scaled, center = TRUE, scale = TRUE)
summary(pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5    PC6     PC7
## Standard deviation     2.9790 2.0564 1.13183 0.90457 0.66203 0.4289 0.33410
## Proportion of Variance 0.5547 0.2643 0.08007 0.05114 0.02739 0.0115 0.00698
## Cumulative Proportion  0.5547 0.8190 0.89904 0.95018 0.97757 0.9891 0.99605
##                            PC8     PC9    PC10    PC11    PC12    PC13     PC14
## Standard deviation     0.22806 0.09089 0.03813 0.03247 0.01715 0.01220 0.003164
## Proportion of Variance 0.00325 0.00052 0.00009 0.00007 0.00002 0.00001 0.000000
## Cumulative Proportion  0.99930 0.99981 0.99991 0.99997 0.99999 1.00000 1.000000
##                            PC15     PC16
## Standard deviation     0.001465 0.001336
## Proportion of Variance 0.000000 0.000000
## Cumulative Proportion  1.000000 1.000000
fviz_eig(pca)
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

kable(
  round(pca$rotation[,1:3],3),
  caption = "Tabel 2. Loading Tiga Komponen Utama"
)
Tabel 2. Loading Tiga Komponen Utama
PC1 PC2 PC3
Area 0.282 0.246 -0.061
Perimeter 0.311 0.179 -0.019
MajorAxisLength 0.326 0.101 -0.085
MinorAxisLength 0.236 0.343 0.008
AspectRation 0.229 -0.331 -0.169
Eccentricity 0.232 -0.319 -0.163
ConvexArea 0.283 0.245 -0.054
EquivDiameter 0.297 0.223 -0.050
Extent -0.060 0.221 -0.085
Solidity -0.143 0.103 -0.739
roundness -0.248 0.215 -0.163
Compactness -0.238 0.329 0.150
ShapeFactor1 -0.221 -0.333 -0.033
ShapeFactor2 -0.315 0.129 0.120
ShapeFactor3 -0.239 0.328 0.150
ShapeFactor4 -0.198 0.100 -0.537
fa.parallel(data_scaled, fa = "fa", fm = "pa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
fa_result <- fa(data_scaled, 
                nfactors = 3, 
                rotate = "varimax",
                fm = "pa")
## maximum iteration exceeded
## Warning in log(det(m.inv.r)): NaNs produced
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
# 3. Melihat ringkasan hasil
print(fa_result)
## Factor Analysis using method =  pa
## Call: fa(r = data_scaled, nfactors = 3, rotate = "varimax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                   PA1   PA2   PA3   h2      u2 com
## Area             0.97 -0.14 -0.06 0.96  0.0372 1.0
## Perimeter        0.95 -0.29 -0.13 1.00  0.0011 1.2
## MajorAxisLength  0.88 -0.46 -0.09 1.00  0.0024 1.5
## MinorAxisLength  0.99  0.11 -0.07 1.00 -0.0044 1.0
## AspectRation     0.10 -0.97 -0.05 0.96  0.0359 1.0
## Eccentricity     0.12 -0.95 -0.07 0.92  0.0752 1.0
## ConvexArea       0.97 -0.14 -0.07 0.96  0.0368 1.1
## EquivDiameter    0.98 -0.20 -0.08 1.00 -0.0049 1.1
## Extent           0.11  0.37  0.11 0.16  0.8376 1.4
## Solidity        -0.09  0.20  1.23 1.56 -0.5567 1.1
## roundness       -0.27  0.72  0.35 0.71  0.2889 1.8
## Compactness     -0.12  0.99  0.08 1.01 -0.0059 1.0
## ShapeFactor1    -0.92 -0.12  0.08 0.87  0.1296 1.0
## ShapeFactor2    -0.56  0.80  0.11 0.97  0.0348 1.8
## ShapeFactor3    -0.13  0.99  0.08 1.00 -0.0034 1.0
## ShapeFactor4    -0.28  0.40  0.47 0.46  0.5389 2.6
## 
##                        PA1  PA2  PA3
## SS loadings           6.89 5.72 1.95
## Proportion Var        0.43 0.36 0.12
## Cumulative Var        0.43 0.79 0.91
## Proportion Explained  0.47 0.39 0.13
## Cumulative Proportion 0.47 0.87 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 3 factors are sufficient.
## 
## df null model =  120  with the objective function =  76.92 with Chi Square =  1046464
## df of  the model are 75  and the objective function was  NaN 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.02 
## 
## The harmonic n.obs is  13611 with the empirical chi square  362.94  with prob <  2.4e-39 
## The total n.obs was  13611  with Likelihood Chi Square =  NaN  with prob <  NaN 
## 
## Tucker Lewis Index of factoring reliability =  NaN
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                   PA1 PA2 PA3
## Correlation of (regression) scores with factors     1   1   1
## Multiple R square of scores with factors            1   1   1
## Minimum correlation of possible factor scores       1   1   1
# 4. Membuat tabel loading
load <- as.data.frame(unclass(fa_result$loadings))

load_round <- round(load, 3)

load_cut <- load_round
load_cut[abs(load_cut) < 0.5] <- ""

kable(load_cut,
      caption = "Tabel 3. Factor Loading (>0,5) Setelah Rotasi Varimax")
Tabel 3. Factor Loading (>0,5) Setelah Rotasi Varimax
PA1 PA2 PA3
Area 0.969
Perimeter 0.948
MajorAxisLength 0.884
MinorAxisLength 0.993
AspectRation -0.975
Eccentricity -0.951
ConvexArea 0.969
EquivDiameter 0.979
Extent
Solidity 1.227
roundness 0.72
Compactness 0.992
ShapeFactor1 -0.922
ShapeFactor2 -0.556 0.803
ShapeFactor3 0.991
ShapeFactor4