df <- read.csv("Dry_Bean_Dataset.csv")
head(df)
## Area Perimeter MajorAxisLength MinorAxisLength AspectRation Eccentricity
## 1 28395 610.291 208.1781 173.8887 1.197191 0.5498122
## 2 28734 638.018 200.5248 182.7344 1.097356 0.4117853
## 3 29380 624.110 212.8261 175.9311 1.209713 0.5627273
## 4 30008 645.884 210.5580 182.5165 1.153638 0.4986160
## 5 30140 620.134 201.8479 190.2793 1.060798 0.3336797
## 6 30279 634.927 212.5606 181.5102 1.171067 0.5204007
## ConvexArea EquivDiameter Extent Solidity roundness Compactness
## 1 28715 190.1411 0.7639225 0.9888560 0.9580271 0.9133578
## 2 29172 191.2728 0.7839681 0.9849856 0.8870336 0.9538608
## 3 29690 193.4109 0.7781132 0.9895588 0.9478495 0.9087742
## 4 30724 195.4671 0.7826813 0.9766957 0.9039364 0.9283288
## 5 30417 195.8965 0.7730980 0.9908933 0.9848771 0.9705155
## 6 30600 196.3477 0.7756885 0.9895098 0.9438518 0.9237260
## ShapeFactor1 ShapeFactor2 ShapeFactor3 ShapeFactor4 Class
## 1 0.007331506 0.003147289 0.8342224 0.9987239 SEKER
## 2 0.006978659 0.003563624 0.9098505 0.9984303 SEKER
## 3 0.007243912 0.003047733 0.8258706 0.9990661 SEKER
## 4 0.007016729 0.003214562 0.8617944 0.9941988 SEKER
## 5 0.006697010 0.003664972 0.9419004 0.9991661 SEKER
## 6 0.007020065 0.003152779 0.8532696 0.9992358 SEKER
dim(df)
## [1] 13611 17
str(df)
## 'data.frame': 13611 obs. of 17 variables:
## $ Area : int 28395 28734 29380 30008 30140 30279 30477 30519 30685 30834 ...
## $ Perimeter : num 610 638 624 646 620 ...
## $ MajorAxisLength: num 208 201 213 211 202 ...
## $ MinorAxisLength: num 174 183 176 183 190 ...
## $ AspectRation : num 1.2 1.1 1.21 1.15 1.06 ...
## $ Eccentricity : num 0.55 0.412 0.563 0.499 0.334 ...
## $ ConvexArea : int 28715 29172 29690 30724 30417 30600 30970 30847 31044 31120 ...
## $ EquivDiameter : num 190 191 193 195 196 ...
## $ Extent : num 0.764 0.784 0.778 0.783 0.773 ...
## $ Solidity : num 0.989 0.985 0.99 0.977 0.991 ...
## $ roundness : num 0.958 0.887 0.948 0.904 0.985 ...
## $ Compactness : num 0.913 0.954 0.909 0.928 0.971 ...
## $ ShapeFactor1 : num 0.00733 0.00698 0.00724 0.00702 0.0067 ...
## $ ShapeFactor2 : num 0.00315 0.00356 0.00305 0.00321 0.00366 ...
## $ ShapeFactor3 : num 0.834 0.91 0.826 0.862 0.942 ...
## $ ShapeFactor4 : num 0.999 0.998 0.999 0.994 0.999 ...
## $ Class : chr "SEKER" "SEKER" "SEKER" "SEKER" ...
colSums(is.na(df))
## Area Perimeter MajorAxisLength MinorAxisLength AspectRation
## 0 0 0 0 0
## Eccentricity ConvexArea EquivDiameter Extent Solidity
## 0 0 0 0 0
## roundness Compactness ShapeFactor1 ShapeFactor2 ShapeFactor3
## 0 0 0 0 0
## ShapeFactor4 Class
## 0 0
data_numeric <- df[, sapply(df, is.numeric)]
# Statistik Deskriptif
desc <- describe(data_numeric)
kable(desc[,c("mean","sd","min","max")],
caption = "Tabel 1. Statistik Deskriptif Variabel")
Tabel 1. Statistik Deskriptif Variabel
| Area |
5.304828e+04 |
2.932410e+04 |
2.042000e+04 |
2.546160e+05 |
| Perimeter |
8.552835e+02 |
2.142897e+02 |
5.247360e+02 |
1.985370e+03 |
| MajorAxisLength |
3.201419e+02 |
8.569419e+01 |
1.836012e+02 |
7.388602e+02 |
| MinorAxisLength |
2.022707e+02 |
4.497009e+01 |
1.225127e+02 |
4.601985e+02 |
| AspectRation |
1.583242e+00 |
2.466785e-01 |
1.024868e+00 |
2.430306e+00 |
| Eccentricity |
7.508949e-01 |
9.200180e-02 |
2.189513e-01 |
9.114230e-01 |
| ConvexArea |
5.376820e+04 |
2.977492e+04 |
2.068400e+04 |
2.632610e+05 |
| EquivDiameter |
2.530642e+02 |
5.917712e+01 |
1.612438e+02 |
5.693744e+02 |
| Extent |
7.497328e-01 |
4.908640e-02 |
5.553147e-01 |
8.661946e-01 |
| Solidity |
9.871428e-01 |
4.660400e-03 |
9.192462e-01 |
9.946775e-01 |
| roundness |
8.732818e-01 |
5.951990e-02 |
4.896183e-01 |
9.906854e-01 |
| Compactness |
7.998637e-01 |
6.171350e-02 |
6.405768e-01 |
9.873030e-01 |
| ShapeFactor1 |
6.563600e-03 |
1.128000e-03 |
2.778000e-03 |
1.045120e-02 |
| ShapeFactor2 |
1.715900e-03 |
5.959000e-04 |
5.642000e-04 |
3.665000e-03 |
| ShapeFactor3 |
6.435902e-01 |
9.899620e-02 |
4.103386e-01 |
9.747672e-01 |
| ShapeFactor4 |
9.950633e-01 |
4.366500e-03 |
9.476874e-01 |
9.997325e-01 |
# Matriks Korelasi
cor_matrix <- cor(data_numeric)
corrplot(cor_matrix,
method="color",
type="upper",
tl.cex=0.6)

KMO(data_numeric)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_numeric)
## Overall MSA = 0.67
## MSA for each item =
## Area Perimeter MajorAxisLength MinorAxisLength AspectRation
## 0.66 0.82 0.68 0.58 0.60
## Eccentricity ConvexArea EquivDiameter Extent Solidity
## 0.70 0.67 0.63 1.00 0.43
## roundness Compactness ShapeFactor1 ShapeFactor2 ShapeFactor3
## 0.74 0.66 0.78 0.85 0.68
## ShapeFactor4
## 0.40
cortest.bartlett(data_numeric)
## R was not square, finding R from data
## $chisq
## [1] 1046464
##
## $p.value
## [1] 0
##
## $df
## [1] 120
data_scaled <- scale(data_numeric)
pca <- prcomp(data_scaled, center = TRUE, scale = TRUE)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.9790 2.0564 1.13183 0.90457 0.66203 0.4289 0.33410
## Proportion of Variance 0.5547 0.2643 0.08007 0.05114 0.02739 0.0115 0.00698
## Cumulative Proportion 0.5547 0.8190 0.89904 0.95018 0.97757 0.9891 0.99605
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.22806 0.09089 0.03813 0.03247 0.01715 0.01220 0.003164
## Proportion of Variance 0.00325 0.00052 0.00009 0.00007 0.00002 0.00001 0.000000
## Cumulative Proportion 0.99930 0.99981 0.99991 0.99997 0.99999 1.00000 1.000000
## PC15 PC16
## Standard deviation 0.001465 0.001336
## Proportion of Variance 0.000000 0.000000
## Cumulative Proportion 1.000000 1.000000
fviz_eig(pca)
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

kable(
round(pca$rotation[,1:3],3),
caption = "Tabel 2. Loading Tiga Komponen Utama"
)
Tabel 2. Loading Tiga Komponen Utama
| Area |
0.282 |
0.246 |
-0.061 |
| Perimeter |
0.311 |
0.179 |
-0.019 |
| MajorAxisLength |
0.326 |
0.101 |
-0.085 |
| MinorAxisLength |
0.236 |
0.343 |
0.008 |
| AspectRation |
0.229 |
-0.331 |
-0.169 |
| Eccentricity |
0.232 |
-0.319 |
-0.163 |
| ConvexArea |
0.283 |
0.245 |
-0.054 |
| EquivDiameter |
0.297 |
0.223 |
-0.050 |
| Extent |
-0.060 |
0.221 |
-0.085 |
| Solidity |
-0.143 |
0.103 |
-0.739 |
| roundness |
-0.248 |
0.215 |
-0.163 |
| Compactness |
-0.238 |
0.329 |
0.150 |
| ShapeFactor1 |
-0.221 |
-0.333 |
-0.033 |
| ShapeFactor2 |
-0.315 |
0.129 |
0.120 |
| ShapeFactor3 |
-0.239 |
0.328 |
0.150 |
| ShapeFactor4 |
-0.198 |
0.100 |
-0.537 |
fa.parallel(data_scaled, fa = "fa", fm = "pa")

## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
fa_result <- fa(data_scaled,
nfactors = 3,
rotate = "varimax",
fm = "pa")
## maximum iteration exceeded
## Warning in log(det(m.inv.r)): NaNs produced
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
# 3. Melihat ringkasan hasil
print(fa_result)
## Factor Analysis using method = pa
## Call: fa(r = data_scaled, nfactors = 3, rotate = "varimax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 PA3 h2 u2 com
## Area 0.97 -0.14 -0.06 0.96 0.0372 1.0
## Perimeter 0.95 -0.29 -0.13 1.00 0.0011 1.2
## MajorAxisLength 0.88 -0.46 -0.09 1.00 0.0024 1.5
## MinorAxisLength 0.99 0.11 -0.07 1.00 -0.0044 1.0
## AspectRation 0.10 -0.97 -0.05 0.96 0.0359 1.0
## Eccentricity 0.12 -0.95 -0.07 0.92 0.0752 1.0
## ConvexArea 0.97 -0.14 -0.07 0.96 0.0368 1.1
## EquivDiameter 0.98 -0.20 -0.08 1.00 -0.0049 1.1
## Extent 0.11 0.37 0.11 0.16 0.8376 1.4
## Solidity -0.09 0.20 1.23 1.56 -0.5567 1.1
## roundness -0.27 0.72 0.35 0.71 0.2889 1.8
## Compactness -0.12 0.99 0.08 1.01 -0.0059 1.0
## ShapeFactor1 -0.92 -0.12 0.08 0.87 0.1296 1.0
## ShapeFactor2 -0.56 0.80 0.11 0.97 0.0348 1.8
## ShapeFactor3 -0.13 0.99 0.08 1.00 -0.0034 1.0
## ShapeFactor4 -0.28 0.40 0.47 0.46 0.5389 2.6
##
## PA1 PA2 PA3
## SS loadings 6.89 5.72 1.95
## Proportion Var 0.43 0.36 0.12
## Cumulative Var 0.43 0.79 0.91
## Proportion Explained 0.47 0.39 0.13
## Cumulative Proportion 0.47 0.87 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 120 with the objective function = 76.92 with Chi Square = 1046464
## df of the model are 75 and the objective function was NaN
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.02
##
## The harmonic n.obs is 13611 with the empirical chi square 362.94 with prob < 2.4e-39
## The total n.obs was 13611 with Likelihood Chi Square = NaN with prob < NaN
##
## Tucker Lewis Index of factoring reliability = NaN
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA1 PA2 PA3
## Correlation of (regression) scores with factors 1 1 1
## Multiple R square of scores with factors 1 1 1
## Minimum correlation of possible factor scores 1 1 1
# 4. Membuat tabel loading
load <- as.data.frame(unclass(fa_result$loadings))
load_round <- round(load, 3)
load_cut <- load_round
load_cut[abs(load_cut) < 0.5] <- ""
kable(load_cut,
caption = "Tabel 3. Factor Loading (>0,5) Setelah Rotasi Varimax")
Tabel 3. Factor Loading (>0,5) Setelah Rotasi
Varimax
| Area |
0.969 |
|
|
| Perimeter |
0.948 |
|
|
| MajorAxisLength |
0.884 |
|
|
| MinorAxisLength |
0.993 |
|
|
| AspectRation |
|
-0.975 |
|
| Eccentricity |
|
-0.951 |
|
| ConvexArea |
0.969 |
|
|
| EquivDiameter |
0.979 |
|
|
| Extent |
|
|
|
| Solidity |
|
|
1.227 |
| roundness |
|
0.72 |
|
| Compactness |
|
0.992 |
|
| ShapeFactor1 |
-0.922 |
|
|
| ShapeFactor2 |
-0.556 |
0.803 |
|
| ShapeFactor3 |
|
0.991 |
|
| ShapeFactor4 |
|
|
|