Library
library(psych)
library(corrplot)
library(tidyr)
library(dplyr)
library(ggplot2)
Persiapan Dataset
Baca Dataset
data <- read.csv("Occupancy_Estimation.csv")
head(data)
## Date Time S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light
## 1 2017/12/22 10:49:41 24.94 24.75 24.56 25.38 121 34
## 2 2017/12/22 10:50:12 24.94 24.75 24.56 25.44 121 33
## 3 2017/12/22 10:50:42 25.00 24.75 24.50 25.44 121 34
## 4 2017/12/22 10:51:13 25.00 24.75 24.56 25.44 121 34
## 5 2017/12/22 10:51:44 25.00 24.75 24.56 25.44 121 34
## 6 2017/12/22 10:52:14 25.00 24.81 24.56 25.44 121 34
## S3_Light S4_Light S1_Sound S2_Sound S3_Sound S4_Sound S5_CO2 S5_CO2_Slope
## 1 53 40 0.08 0.19 0.06 0.06 390 0.7692308
## 2 53 40 0.93 0.05 0.06 0.06 390 0.6461538
## 3 53 40 0.43 0.11 0.08 0.06 390 0.5192308
## 4 53 40 0.41 0.10 0.10 0.09 390 0.3884615
## 5 54 40 0.18 0.06 0.06 0.06 390 0.2538462
## 6 54 40 0.13 0.06 0.06 0.07 390 0.1653846
## S6_PIR S7_PIR Room_Occupancy_Count
## 1 0 0 1
## 2 0 0 1
## 3 0 0 1
## 4 0 0 1
## 5 0 0 1
## 6 0 0 1
Cek Tipe Data
## 'data.frame': 10129 obs. of 19 variables:
## $ Date : chr "2017/12/22" "2017/12/22" "2017/12/22" "2017/12/22" ...
## $ Time : chr "10:49:41" "10:50:12" "10:50:42" "10:51:13" ...
## $ S1_Temp : num 24.9 24.9 25 25 25 ...
## $ S2_Temp : num 24.8 24.8 24.8 24.8 24.8 ...
## $ S3_Temp : num 24.6 24.6 24.5 24.6 24.6 ...
## $ S4_Temp : num 25.4 25.4 25.4 25.4 25.4 ...
## $ S1_Light : int 121 121 121 121 121 121 120 121 122 101 ...
## $ S2_Light : int 34 33 34 34 34 34 34 34 35 34 ...
## $ S3_Light : int 53 53 53 53 54 54 54 54 56 57 ...
## $ S4_Light : int 40 40 40 40 40 40 40 41 43 43 ...
## $ S1_Sound : num 0.08 0.93 0.43 0.41 0.18 0.13 1.39 0.09 0.09 3.84 ...
## $ S2_Sound : num 0.19 0.05 0.11 0.1 0.06 0.06 0.32 0.06 0.05 0.64 ...
## $ S3_Sound : num 0.06 0.06 0.08 0.1 0.06 0.06 0.43 0.09 0.06 0.48 ...
## $ S4_Sound : num 0.06 0.06 0.06 0.09 0.06 0.07 0.06 0.05 0.13 0.39 ...
## $ S5_CO2 : int 390 390 390 390 390 390 390 390 390 390 ...
## $ S5_CO2_Slope : num 0.769 0.646 0.519 0.388 0.254 ...
## $ S6_PIR : int 0 0 0 0 0 0 1 0 0 1 ...
## $ S7_PIR : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Room_Occupancy_Count: int 1 1 1 1 1 1 1 1 1 1 ...
Preprocessing Data
Potong Variabel Yang
Tidak Numerik
data <- data[, 3:18]
head(data)
## S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light S3_Light S4_Light S1_Sound
## 1 24.94 24.75 24.56 25.38 121 34 53 40 0.08
## 2 24.94 24.75 24.56 25.44 121 33 53 40 0.93
## 3 25.00 24.75 24.50 25.44 121 34 53 40 0.43
## 4 25.00 24.75 24.56 25.44 121 34 53 40 0.41
## 5 25.00 24.75 24.56 25.44 121 34 54 40 0.18
## 6 25.00 24.81 24.56 25.44 121 34 54 40 0.13
## S2_Sound S3_Sound S4_Sound S5_CO2 S5_CO2_Slope S6_PIR S7_PIR
## 1 0.19 0.06 0.06 390 0.7692308 0 0
## 2 0.05 0.06 0.06 390 0.6461538 0 0
## 3 0.11 0.08 0.06 390 0.5192308 0 0
## 4 0.10 0.10 0.09 390 0.3884615 0 0
## 5 0.06 0.06 0.06 390 0.2538462 0 0
## 6 0.06 0.06 0.07 390 0.1653846 0 0
Exploratory Data
Analyst (EDA)
Statistika
Deskriptif
## S1_Temp S2_Temp S3_Temp S4_Temp
## Min. :24.94 Min. :24.75 Min. :24.44 Min. :24.94
## 1st Qu.:25.19 1st Qu.:25.19 1st Qu.:24.69 1st Qu.:25.44
## Median :25.38 Median :25.38 Median :24.94 Median :25.75
## Mean :25.45 Mean :25.55 Mean :25.06 Mean :25.75
## 3rd Qu.:25.63 3rd Qu.:25.63 3rd Qu.:25.38 3rd Qu.:26.00
## Max. :26.38 Max. :29.00 Max. :26.19 Max. :26.56
## S1_Light S2_Light S3_Light S4_Light
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00 Median : 0.00 Median : 0.00
## Mean : 25.45 Mean : 26.02 Mean : 34.25 Mean :13.22
## 3rd Qu.: 12.00 3rd Qu.: 14.00 3rd Qu.: 50.00 3rd Qu.:22.00
## Max. :165.00 Max. :258.00 Max. :280.00 Max. :74.00
## S1_Sound S2_Sound S3_Sound S4_Sound
## Min. :0.0600 Min. :0.0400 Min. :0.0400 Min. :0.0500
## 1st Qu.:0.0700 1st Qu.:0.0500 1st Qu.:0.0600 1st Qu.:0.0600
## Median :0.0800 Median :0.0500 Median :0.0600 Median :0.0800
## Mean :0.1682 Mean :0.1201 Mean :0.1581 Mean :0.1038
## 3rd Qu.:0.0800 3rd Qu.:0.0600 3rd Qu.:0.0700 3rd Qu.:0.1000
## Max. :3.8800 Max. :3.4400 Max. :3.6700 Max. :3.4000
## S5_CO2 S5_CO2_Slope S6_PIR S7_PIR
## Min. : 345.0 Min. :-6.29615 Min. :0.00000 Min. :0.00000
## 1st Qu.: 355.0 1st Qu.:-0.04615 1st Qu.:0.00000 1st Qu.:0.00000
## Median : 360.0 Median : 0.00000 Median :0.00000 Median :0.00000
## Mean : 460.9 Mean :-0.00483 Mean :0.09014 Mean :0.07957
## 3rd Qu.: 465.0 3rd Qu.: 0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1270.0 Max. : 8.98077 Max. :1.00000 Max. :1.00000
desk <- describe(data)
print(desk)
## vars n mean sd median trimmed mad min max
## S1_Temp 1 10129 25.45 0.35 25.38 25.41 0.28 24.94 26.38
## S2_Temp 2 10129 25.55 0.59 25.38 25.43 0.28 24.75 29.00
## S3_Temp 3 10129 25.06 0.43 24.94 25.02 0.46 24.44 26.19
## S4_Temp 4 10129 25.75 0.36 25.75 25.75 0.37 24.94 26.56
## S1_Light 5 10129 25.45 51.01 0.00 13.06 0.00 0.00 165.00
## S2_Light 6 10129 26.02 67.30 0.00 5.44 0.00 0.00 258.00
## S3_Light 7 10129 34.25 58.40 0.00 19.26 0.00 0.00 280.00
## S4_Light 8 10129 13.22 19.60 0.00 9.31 0.00 0.00 74.00
## S1_Sound 9 10129 0.17 0.32 0.08 0.09 0.01 0.06 3.88
## S2_Sound 10 10129 0.12 0.27 0.05 0.06 0.00 0.04 3.44
## S3_Sound 11 10129 0.16 0.41 0.06 0.07 0.00 0.04 3.67
## S4_Sound 12 10129 0.10 0.12 0.08 0.08 0.03 0.05 3.40
## S5_CO2 13 10129 460.86 199.96 360.00 412.29 14.83 345.00 1270.00
## S5_CO2_Slope 14 10129 0.00 1.16 0.00 -0.01 0.05 -6.30 8.98
## S6_PIR 15 10129 0.09 0.29 0.00 0.00 0.00 0.00 1.00
## S7_PIR 16 10129 0.08 0.27 0.00 0.00 0.00 0.00 1.00
## range skew kurtosis se
## S1_Temp 1.44 0.95 -0.16 0.00
## S2_Temp 4.25 2.35 6.50 0.01
## S3_Temp 1.75 0.65 -0.55 0.00
## S4_Temp 1.62 0.13 -0.93 0.00
## S1_Light 165.00 1.82 1.51 0.51
## S2_Light 258.00 2.83 6.18 0.67
## S3_Light 280.00 2.10 3.88 0.58
## S4_Light 74.00 1.36 0.57 0.19
## S1_Sound 3.82 5.45 39.39 0.00
## S2_Sound 3.40 6.88 62.43 0.00
## S3_Sound 3.63 5.99 39.45 0.00
## S4_Sound 3.35 10.95 211.02 0.00
## S5_CO2 925.00 1.98 3.00 1.99
## S5_CO2_Slope 15.28 0.29 7.40 0.01
## S6_PIR 1.00 2.86 6.19 0.00
## S7_PIR 1.00 3.11 7.65 0.00
Distribusi Data

Matriks Korelasi
Data

Pengujian prasyarat
Principat Component Analysis dan Factor Analyst
Uji KMO dan MSA
kmo <- KMO(data)
print(kmo)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data)
## Overall MSA = 0.86
## MSA for each item =
## S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light
## 0.78 0.90 0.84 0.91 0.81 0.85
## S3_Light S4_Light S1_Sound S2_Sound S3_Sound S4_Sound
## 0.87 0.64 0.95 0.90 0.90 0.90
## S5_CO2 S5_CO2_Slope S6_PIR S7_PIR
## 0.87 0.83 0.94 0.94
Uji Bartlett
bartlett <- cortest.bartlett(data)
print(bartlett)
## $chisq
## [1] 159470.1
##
## $p.value
## [1] 0
##
## $df
## [1] 120
Principal Component
Analysis (PCA)
pca_initial <- principal(data, nfactors = length(data), rotate = "none")
plot(pca_initial$values, type = "b",
main = "Scree Plot",
xlab = "Jumlah Komponen",
ylab = "Eigenvalue")
abline(h = 1, col = "red", lty = 2)

pca_final <- principal(data, nfactors = 3, rotate = "varimax")
print(pca_final)
## Principal Components Analysis
## Call: principal(r = data, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## S1_Temp 0.93 0.27 0.13 0.95 0.052 1.2
## S2_Temp 0.77 0.28 0.29 0.75 0.246 1.6
## S3_Temp 0.93 0.19 0.15 0.93 0.074 1.1
## S4_Temp 0.86 0.10 0.23 0.80 0.203 1.2
## S1_Light 0.49 0.43 0.66 0.85 0.151 2.6
## S2_Light 0.39 0.42 0.65 0.76 0.244 2.4
## S3_Light 0.52 0.40 0.60 0.78 0.215 2.7
## S4_Light 0.18 -0.02 0.80 0.67 0.328 1.1
## S1_Sound 0.23 0.68 0.27 0.58 0.415 1.6
## S2_Sound 0.16 0.71 0.28 0.60 0.399 1.4
## S3_Sound 0.27 0.81 0.04 0.72 0.277 1.2
## S4_Sound 0.16 0.82 0.05 0.71 0.294 1.1
## S5_CO2 0.86 0.29 0.07 0.82 0.177 1.2
## S5_CO2_Slope -0.14 0.45 0.59 0.57 0.434 2.0
## S6_PIR 0.25 0.52 0.43 0.52 0.477 2.4
## S7_PIR 0.29 0.64 0.28 0.57 0.429 1.8
##
## RC1 RC2 RC3
## SS loadings 4.84 3.96 2.79
## Proportion Var 0.30 0.25 0.17
## Cumulative Var 0.30 0.55 0.72
## Proportion Explained 0.42 0.34 0.24
## Cumulative Proportion 0.42 0.76 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.04
## with the empirical chi square 3659.99 with prob < 0
##
## Fit based upon off diagonal values = 0.99
fa.diagram(pca_final, main = "Struktur Komponen PCA")

Factor Analyst
(FA)
ev <- eigen(cor(data))
eigen_values <- ev$values
print(eigen_values)
## [1] 8.36109943 2.02386891 1.20028936 0.81156103 0.64485047 0.53719836
## [7] 0.48004529 0.46427811 0.37483824 0.31462650 0.26759188 0.21239872
## [13] 0.12342348 0.09476426 0.06796424 0.02120171
fa_without_rotation <- factanal(x = data, factors = 3, rotation="none", fm = "ml")
fa_without_rotation
##
## Call:
## factanal(x = data, factors = 3, rotation = "none", fm = "ml")
##
## Uniquenesses:
## S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light
## 0.014 0.302 0.062 0.187 0.163 0.275
## S3_Light S4_Light S1_Sound S2_Sound S3_Sound S4_Sound
## 0.235 0.005 0.533 0.557 0.549 0.620
## S5_CO2 S5_CO2_Slope S6_PIR S7_PIR
## 0.230 0.586 0.541 0.510
##
## Loadings:
## Factor1 Factor2 Factor3
## S1_Temp 0.896 0.427
## S2_Temp 0.640 0.532
## S3_Temp 0.816 0.498 -0.157
## S4_Temp 0.682 0.554 -0.204
## S1_Light 0.464 0.642 0.458
## S2_Light 0.369 0.567 0.517
## S3_Light 0.408 0.697 0.336
## S4_Light -0.227 0.971
## S1_Sound 0.321 0.384 0.465
## S2_Sound 0.272 0.383 0.472
## S3_Sound 0.388 0.273 0.476
## S4_Sound 0.290 0.281 0.465
## S5_CO2 0.806 0.342
## S5_CO2_Slope 0.245 0.592
## S6_PIR 0.311 0.413 0.438
## S7_PIR 0.402 0.328 0.470
##
## Factor1 Factor2 Factor3
## SS loadings 4.235 4.087 2.312
## Proportion Var 0.265 0.255 0.144
## Cumulative Var 0.265 0.520 0.665
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 22992.56 on 75 degrees of freedom.
## The p-value is 0
fa_rotation <- factanal(x = data, factors = 3, rotation="varimax", fm = "ml")
fa_rotation
##
## Call:
## factanal(x = data, factors = 3, rotation = "varimax", fm = "ml")
##
## Uniquenesses:
## S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light
## 0.014 0.302 0.062 0.187 0.163 0.275
## S3_Light S4_Light S1_Sound S2_Sound S3_Sound S4_Sound
## 0.235 0.005 0.533 0.557 0.549 0.620
## S5_CO2 S5_CO2_Slope S6_PIR S7_PIR
## 0.230 0.586 0.541 0.510
##
## Loadings:
## Factor1 Factor2 Factor3
## S1_Temp 0.311 0.943
## S2_Temp 0.390 0.715 0.186
## S3_Temp 0.206 0.938 0.126
## S4_Temp 0.148 0.856 0.241
## S1_Light 0.731 0.469 0.287
## S2_Light 0.740 0.340 0.250
## S3_Light 0.623 0.481 0.382
## S4_Light 0.232 0.142 0.960
## S1_Sound 0.624 0.253 0.117
## S2_Sound 0.618 0.207 0.136
## S3_Sound 0.615 0.269
## S4_Sound 0.585 0.190
## S5_CO2 0.358 0.800
## S5_CO2_Slope 0.636
## S6_PIR 0.605 0.263 0.153
## S7_PIR 0.630 0.303
##
## Factor1 Factor2 Factor3
## SS loadings 4.623 4.622 1.388
## Proportion Var 0.289 0.289 0.087
## Cumulative Var 0.289 0.578 0.665
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 22992.56 on 75 degrees of freedom.
## The p-value is 0
loadings_matrix <- as.matrix(fa_rotation$loadings)
communalities <- rowSums(loadings_matrix^2)
round(communalities, 3)
## S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light
## 0.986 0.698 0.938 0.813 0.837 0.725
## S3_Light S4_Light S1_Sound S2_Sound S3_Sound S4_Sound
## 0.765 0.995 0.467 0.443 0.451 0.380
## S5_CO2 S5_CO2_Slope S6_PIR S7_PIR
## 0.770 0.414 0.459 0.490
fa_psy <- fa(data, nfactors = 3, rotate = "varimax", fm="ml")
fa.diagram(fa_psy, main = "Struktur Komponen FA")
