1 Library

library(psych)
library(corrplot)
library(tidyr)
library(dplyr)
library(ggplot2)

2 Persiapan Dataset

2.1 Baca Dataset

data <- read.csv("Occupancy_Estimation.csv")
head(data)
##         Date     Time S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light
## 1 2017/12/22 10:49:41   24.94   24.75   24.56   25.38      121       34
## 2 2017/12/22 10:50:12   24.94   24.75   24.56   25.44      121       33
## 3 2017/12/22 10:50:42   25.00   24.75   24.50   25.44      121       34
## 4 2017/12/22 10:51:13   25.00   24.75   24.56   25.44      121       34
## 5 2017/12/22 10:51:44   25.00   24.75   24.56   25.44      121       34
## 6 2017/12/22 10:52:14   25.00   24.81   24.56   25.44      121       34
##   S3_Light S4_Light S1_Sound S2_Sound S3_Sound S4_Sound S5_CO2 S5_CO2_Slope
## 1       53       40     0.08     0.19     0.06     0.06    390    0.7692308
## 2       53       40     0.93     0.05     0.06     0.06    390    0.6461538
## 3       53       40     0.43     0.11     0.08     0.06    390    0.5192308
## 4       53       40     0.41     0.10     0.10     0.09    390    0.3884615
## 5       54       40     0.18     0.06     0.06     0.06    390    0.2538462
## 6       54       40     0.13     0.06     0.06     0.07    390    0.1653846
##   S6_PIR S7_PIR Room_Occupancy_Count
## 1      0      0                    1
## 2      0      0                    1
## 3      0      0                    1
## 4      0      0                    1
## 5      0      0                    1
## 6      0      0                    1

2.2 Cek Tipe Data

str(data)
## 'data.frame':    10129 obs. of  19 variables:
##  $ Date                : chr  "2017/12/22" "2017/12/22" "2017/12/22" "2017/12/22" ...
##  $ Time                : chr  "10:49:41" "10:50:12" "10:50:42" "10:51:13" ...
##  $ S1_Temp             : num  24.9 24.9 25 25 25 ...
##  $ S2_Temp             : num  24.8 24.8 24.8 24.8 24.8 ...
##  $ S3_Temp             : num  24.6 24.6 24.5 24.6 24.6 ...
##  $ S4_Temp             : num  25.4 25.4 25.4 25.4 25.4 ...
##  $ S1_Light            : int  121 121 121 121 121 121 120 121 122 101 ...
##  $ S2_Light            : int  34 33 34 34 34 34 34 34 35 34 ...
##  $ S3_Light            : int  53 53 53 53 54 54 54 54 56 57 ...
##  $ S4_Light            : int  40 40 40 40 40 40 40 41 43 43 ...
##  $ S1_Sound            : num  0.08 0.93 0.43 0.41 0.18 0.13 1.39 0.09 0.09 3.84 ...
##  $ S2_Sound            : num  0.19 0.05 0.11 0.1 0.06 0.06 0.32 0.06 0.05 0.64 ...
##  $ S3_Sound            : num  0.06 0.06 0.08 0.1 0.06 0.06 0.43 0.09 0.06 0.48 ...
##  $ S4_Sound            : num  0.06 0.06 0.06 0.09 0.06 0.07 0.06 0.05 0.13 0.39 ...
##  $ S5_CO2              : int  390 390 390 390 390 390 390 390 390 390 ...
##  $ S5_CO2_Slope        : num  0.769 0.646 0.519 0.388 0.254 ...
##  $ S6_PIR              : int  0 0 0 0 0 0 1 0 0 1 ...
##  $ S7_PIR              : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Room_Occupancy_Count: int  1 1 1 1 1 1 1 1 1 1 ...

3 Preprocessing Data

3.1 Cek Nilai Null

sum(is.na(data))
## [1] 0

3.2 Potong Variabel Yang Tidak Numerik

data <- data[, 3:18]
head(data)
##   S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light S3_Light S4_Light S1_Sound
## 1   24.94   24.75   24.56   25.38      121       34       53       40     0.08
## 2   24.94   24.75   24.56   25.44      121       33       53       40     0.93
## 3   25.00   24.75   24.50   25.44      121       34       53       40     0.43
## 4   25.00   24.75   24.56   25.44      121       34       53       40     0.41
## 5   25.00   24.75   24.56   25.44      121       34       54       40     0.18
## 6   25.00   24.81   24.56   25.44      121       34       54       40     0.13
##   S2_Sound S3_Sound S4_Sound S5_CO2 S5_CO2_Slope S6_PIR S7_PIR
## 1     0.19     0.06     0.06    390    0.7692308      0      0
## 2     0.05     0.06     0.06    390    0.6461538      0      0
## 3     0.11     0.08     0.06    390    0.5192308      0      0
## 4     0.10     0.10     0.09    390    0.3884615      0      0
## 5     0.06     0.06     0.06    390    0.2538462      0      0
## 6     0.06     0.06     0.07    390    0.1653846      0      0

4 Exploratory Data Analyst (EDA)

4.1 Statistika Deskriptif

summary(data)
##     S1_Temp         S2_Temp         S3_Temp         S4_Temp     
##  Min.   :24.94   Min.   :24.75   Min.   :24.44   Min.   :24.94  
##  1st Qu.:25.19   1st Qu.:25.19   1st Qu.:24.69   1st Qu.:25.44  
##  Median :25.38   Median :25.38   Median :24.94   Median :25.75  
##  Mean   :25.45   Mean   :25.55   Mean   :25.06   Mean   :25.75  
##  3rd Qu.:25.63   3rd Qu.:25.63   3rd Qu.:25.38   3rd Qu.:26.00  
##  Max.   :26.38   Max.   :29.00   Max.   :26.19   Max.   :26.56  
##     S1_Light         S2_Light         S3_Light         S4_Light    
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.: 0.00  
##  Median :  0.00   Median :  0.00   Median :  0.00   Median : 0.00  
##  Mean   : 25.45   Mean   : 26.02   Mean   : 34.25   Mean   :13.22  
##  3rd Qu.: 12.00   3rd Qu.: 14.00   3rd Qu.: 50.00   3rd Qu.:22.00  
##  Max.   :165.00   Max.   :258.00   Max.   :280.00   Max.   :74.00  
##     S1_Sound         S2_Sound         S3_Sound         S4_Sound     
##  Min.   :0.0600   Min.   :0.0400   Min.   :0.0400   Min.   :0.0500  
##  1st Qu.:0.0700   1st Qu.:0.0500   1st Qu.:0.0600   1st Qu.:0.0600  
##  Median :0.0800   Median :0.0500   Median :0.0600   Median :0.0800  
##  Mean   :0.1682   Mean   :0.1201   Mean   :0.1581   Mean   :0.1038  
##  3rd Qu.:0.0800   3rd Qu.:0.0600   3rd Qu.:0.0700   3rd Qu.:0.1000  
##  Max.   :3.8800   Max.   :3.4400   Max.   :3.6700   Max.   :3.4000  
##      S5_CO2        S5_CO2_Slope          S6_PIR            S7_PIR       
##  Min.   : 345.0   Min.   :-6.29615   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.: 355.0   1st Qu.:-0.04615   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median : 360.0   Median : 0.00000   Median :0.00000   Median :0.00000  
##  Mean   : 460.9   Mean   :-0.00483   Mean   :0.09014   Mean   :0.07957  
##  3rd Qu.: 465.0   3rd Qu.: 0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1270.0   Max.   : 8.98077   Max.   :1.00000   Max.   :1.00000
desk <- describe(data)
print(desk)
##              vars     n   mean     sd median trimmed   mad    min     max
## S1_Temp         1 10129  25.45   0.35  25.38   25.41  0.28  24.94   26.38
## S2_Temp         2 10129  25.55   0.59  25.38   25.43  0.28  24.75   29.00
## S3_Temp         3 10129  25.06   0.43  24.94   25.02  0.46  24.44   26.19
## S4_Temp         4 10129  25.75   0.36  25.75   25.75  0.37  24.94   26.56
## S1_Light        5 10129  25.45  51.01   0.00   13.06  0.00   0.00  165.00
## S2_Light        6 10129  26.02  67.30   0.00    5.44  0.00   0.00  258.00
## S3_Light        7 10129  34.25  58.40   0.00   19.26  0.00   0.00  280.00
## S4_Light        8 10129  13.22  19.60   0.00    9.31  0.00   0.00   74.00
## S1_Sound        9 10129   0.17   0.32   0.08    0.09  0.01   0.06    3.88
## S2_Sound       10 10129   0.12   0.27   0.05    0.06  0.00   0.04    3.44
## S3_Sound       11 10129   0.16   0.41   0.06    0.07  0.00   0.04    3.67
## S4_Sound       12 10129   0.10   0.12   0.08    0.08  0.03   0.05    3.40
## S5_CO2         13 10129 460.86 199.96 360.00  412.29 14.83 345.00 1270.00
## S5_CO2_Slope   14 10129   0.00   1.16   0.00   -0.01  0.05  -6.30    8.98
## S6_PIR         15 10129   0.09   0.29   0.00    0.00  0.00   0.00    1.00
## S7_PIR         16 10129   0.08   0.27   0.00    0.00  0.00   0.00    1.00
##               range  skew kurtosis   se
## S1_Temp        1.44  0.95    -0.16 0.00
## S2_Temp        4.25  2.35     6.50 0.01
## S3_Temp        1.75  0.65    -0.55 0.00
## S4_Temp        1.62  0.13    -0.93 0.00
## S1_Light     165.00  1.82     1.51 0.51
## S2_Light     258.00  2.83     6.18 0.67
## S3_Light     280.00  2.10     3.88 0.58
## S4_Light      74.00  1.36     0.57 0.19
## S1_Sound       3.82  5.45    39.39 0.00
## S2_Sound       3.40  6.88    62.43 0.00
## S3_Sound       3.63  5.99    39.45 0.00
## S4_Sound       3.35 10.95   211.02 0.00
## S5_CO2       925.00  1.98     3.00 1.99
## S5_CO2_Slope  15.28  0.29     7.40 0.01
## S6_PIR         1.00  2.86     6.19 0.00
## S7_PIR         1.00  3.11     7.65 0.00

4.2 Distribusi Data

4.3 Matriks Korelasi Data

5 Pengujian prasyarat Principat Component Analysis dan Factor Analyst

5.1 Uji KMO dan MSA

kmo <- KMO(data)
print(kmo)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data)
## Overall MSA =  0.86
## MSA for each item = 
##      S1_Temp      S2_Temp      S3_Temp      S4_Temp     S1_Light     S2_Light 
##         0.78         0.90         0.84         0.91         0.81         0.85 
##     S3_Light     S4_Light     S1_Sound     S2_Sound     S3_Sound     S4_Sound 
##         0.87         0.64         0.95         0.90         0.90         0.90 
##       S5_CO2 S5_CO2_Slope       S6_PIR       S7_PIR 
##         0.87         0.83         0.94         0.94

5.2 Uji Bartlett

bartlett <- cortest.bartlett(data)
print(bartlett)
## $chisq
## [1] 159470.1
## 
## $p.value
## [1] 0
## 
## $df
## [1] 120

6 Principal Component Analysis (PCA)

pca_initial <- principal(data, nfactors = length(data), rotate = "none")


plot(pca_initial$values, type = "b", 
     main = "Scree Plot", 
     xlab = "Jumlah Komponen", 
     ylab = "Eigenvalue")
abline(h = 1, col = "red", lty = 2)

pca_final <- principal(data, nfactors = 3, rotate = "varimax")
print(pca_final)
## Principal Components Analysis
## Call: principal(r = data, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                RC1   RC2  RC3   h2    u2 com
## S1_Temp       0.93  0.27 0.13 0.95 0.052 1.2
## S2_Temp       0.77  0.28 0.29 0.75 0.246 1.6
## S3_Temp       0.93  0.19 0.15 0.93 0.074 1.1
## S4_Temp       0.86  0.10 0.23 0.80 0.203 1.2
## S1_Light      0.49  0.43 0.66 0.85 0.151 2.6
## S2_Light      0.39  0.42 0.65 0.76 0.244 2.4
## S3_Light      0.52  0.40 0.60 0.78 0.215 2.7
## S4_Light      0.18 -0.02 0.80 0.67 0.328 1.1
## S1_Sound      0.23  0.68 0.27 0.58 0.415 1.6
## S2_Sound      0.16  0.71 0.28 0.60 0.399 1.4
## S3_Sound      0.27  0.81 0.04 0.72 0.277 1.2
## S4_Sound      0.16  0.82 0.05 0.71 0.294 1.1
## S5_CO2        0.86  0.29 0.07 0.82 0.177 1.2
## S5_CO2_Slope -0.14  0.45 0.59 0.57 0.434 2.0
## S6_PIR        0.25  0.52 0.43 0.52 0.477 2.4
## S7_PIR        0.29  0.64 0.28 0.57 0.429 1.8
## 
##                        RC1  RC2  RC3
## SS loadings           4.84 3.96 2.79
## Proportion Var        0.30 0.25 0.17
## Cumulative Var        0.30 0.55 0.72
## Proportion Explained  0.42 0.34 0.24
## Cumulative Proportion 0.42 0.76 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.04 
##  with the empirical chi square  3659.99  with prob <  0 
## 
## Fit based upon off diagonal values = 0.99
fa.diagram(pca_final, main = "Struktur Komponen PCA")

7 Factor Analyst (FA)

ev <- eigen(cor(data))
eigen_values  <- ev$values
print(eigen_values)
##  [1] 8.36109943 2.02386891 1.20028936 0.81156103 0.64485047 0.53719836
##  [7] 0.48004529 0.46427811 0.37483824 0.31462650 0.26759188 0.21239872
## [13] 0.12342348 0.09476426 0.06796424 0.02120171
fa_without_rotation <- factanal(x = data, factors = 3, rotation="none", fm = "ml")
fa_without_rotation
## 
## Call:
## factanal(x = data, factors = 3, rotation = "none", fm = "ml")
## 
## Uniquenesses:
##      S1_Temp      S2_Temp      S3_Temp      S4_Temp     S1_Light     S2_Light 
##        0.014        0.302        0.062        0.187        0.163        0.275 
##     S3_Light     S4_Light     S1_Sound     S2_Sound     S3_Sound     S4_Sound 
##        0.235        0.005        0.533        0.557        0.549        0.620 
##       S5_CO2 S5_CO2_Slope       S6_PIR       S7_PIR 
##        0.230        0.586        0.541        0.510 
## 
## Loadings:
##              Factor1 Factor2 Factor3
## S1_Temp       0.896   0.427         
## S2_Temp       0.640   0.532         
## S3_Temp       0.816   0.498  -0.157 
## S4_Temp       0.682   0.554  -0.204 
## S1_Light      0.464   0.642   0.458 
## S2_Light      0.369   0.567   0.517 
## S3_Light      0.408   0.697   0.336 
## S4_Light     -0.227   0.971         
## S1_Sound      0.321   0.384   0.465 
## S2_Sound      0.272   0.383   0.472 
## S3_Sound      0.388   0.273   0.476 
## S4_Sound      0.290   0.281   0.465 
## S5_CO2        0.806   0.342         
## S5_CO2_Slope          0.245   0.592 
## S6_PIR        0.311   0.413   0.438 
## S7_PIR        0.402   0.328   0.470 
## 
##                Factor1 Factor2 Factor3
## SS loadings      4.235   4.087   2.312
## Proportion Var   0.265   0.255   0.144
## Cumulative Var   0.265   0.520   0.665
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 22992.56 on 75 degrees of freedom.
## The p-value is 0
fa_rotation <- factanal(x = data, factors = 3, rotation="varimax", fm = "ml")
fa_rotation
## 
## Call:
## factanal(x = data, factors = 3, rotation = "varimax", fm = "ml")
## 
## Uniquenesses:
##      S1_Temp      S2_Temp      S3_Temp      S4_Temp     S1_Light     S2_Light 
##        0.014        0.302        0.062        0.187        0.163        0.275 
##     S3_Light     S4_Light     S1_Sound     S2_Sound     S3_Sound     S4_Sound 
##        0.235        0.005        0.533        0.557        0.549        0.620 
##       S5_CO2 S5_CO2_Slope       S6_PIR       S7_PIR 
##        0.230        0.586        0.541        0.510 
## 
## Loadings:
##              Factor1 Factor2 Factor3
## S1_Temp       0.311   0.943         
## S2_Temp       0.390   0.715   0.186 
## S3_Temp       0.206   0.938   0.126 
## S4_Temp       0.148   0.856   0.241 
## S1_Light      0.731   0.469   0.287 
## S2_Light      0.740   0.340   0.250 
## S3_Light      0.623   0.481   0.382 
## S4_Light      0.232   0.142   0.960 
## S1_Sound      0.624   0.253   0.117 
## S2_Sound      0.618   0.207   0.136 
## S3_Sound      0.615   0.269         
## S4_Sound      0.585   0.190         
## S5_CO2        0.358   0.800         
## S5_CO2_Slope  0.636                 
## S6_PIR        0.605   0.263   0.153 
## S7_PIR        0.630   0.303         
## 
##                Factor1 Factor2 Factor3
## SS loadings      4.623   4.622   1.388
## Proportion Var   0.289   0.289   0.087
## Cumulative Var   0.289   0.578   0.665
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 22992.56 on 75 degrees of freedom.
## The p-value is 0
loadings_matrix <- as.matrix(fa_rotation$loadings)
communalities <- rowSums(loadings_matrix^2)
round(communalities, 3)
##      S1_Temp      S2_Temp      S3_Temp      S4_Temp     S1_Light     S2_Light 
##        0.986        0.698        0.938        0.813        0.837        0.725 
##     S3_Light     S4_Light     S1_Sound     S2_Sound     S3_Sound     S4_Sound 
##        0.765        0.995        0.467        0.443        0.451        0.380 
##       S5_CO2 S5_CO2_Slope       S6_PIR       S7_PIR 
##        0.770        0.414        0.459        0.490
fa_psy <- fa(data, nfactors = 3, rotate = "varimax", fm="ml")
fa.diagram(fa_psy, main = "Struktur Komponen FA")