#read data

data <- read.csv("dehydration_processed_features.csv")
head(data)
##    HR_mean HRV_RMSSD  HRV_SDNN PPG_amp_var   ACC_mean      ACC_std   ACC_energy
## 1 109.4826 0.1879907 0.1356452   0.3968750   1.112556  0.079639205     1194.359
## 2 108.6019 0.1881000 0.1343460   0.2951184   1.191152  0.060386961     1365.591
## 3 108.6792 0.1451861 0.1170814   0.3388685   1.210696  0.047227912     1409.294
## 4 110.3448 0.1507120 0.1165618   0.3828620   1.215372  0.007706105     1418.100
## 5 110.1807 0.1945415 0.1409913   0.3153519   1.215035  0.007774005     1417.315
## 6 104.0087 0.1816147 0.1291816   0.2795354 255.915811 32.439335339 63883404.450
##     Temp_mean  Temp_slope participant condition
## 1 34438.64551 13.32576411          S1   fasting
## 2 34645.47154 14.41436143          S1   fasting
## 3 34814.89940  9.00731541          S1   fasting
## 4 34911.66021  4.67463538          S1   fasting
## 5 34975.55526  3.87500664          S1   fasting
## 6    35.69831  0.02893608          S1   fasting

ambil variabel numerik saja

data_num <- data[, sapply(data, is.numeric)]
head(data_num)
##    HR_mean HRV_RMSSD  HRV_SDNN PPG_amp_var   ACC_mean      ACC_std   ACC_energy
## 1 109.4826 0.1879907 0.1356452   0.3968750   1.112556  0.079639205     1194.359
## 2 108.6019 0.1881000 0.1343460   0.2951184   1.191152  0.060386961     1365.591
## 3 108.6792 0.1451861 0.1170814   0.3388685   1.210696  0.047227912     1409.294
## 4 110.3448 0.1507120 0.1165618   0.3828620   1.215372  0.007706105     1418.100
## 5 110.1807 0.1945415 0.1409913   0.3153519   1.215035  0.007774005     1417.315
## 6 104.0087 0.1816147 0.1291816   0.2795354 255.915811 32.439335339 63883404.450
##     Temp_mean  Temp_slope
## 1 34438.64551 13.32576411
## 2 34645.47154 14.41436143
## 3 34814.89940  9.00731541
## 4 34911.66021  4.67463538
## 5 34975.55526  3.87500664
## 6    35.69831  0.02893608

hapus NA

data_clean <- subset(data_num, select = -Temp_mean)
data_clean <- na.omit(data_clean)
head(data_clean)
##    HR_mean HRV_RMSSD  HRV_SDNN PPG_amp_var   ACC_mean      ACC_std   ACC_energy
## 1 109.4826 0.1879907 0.1356452   0.3968750   1.112556  0.079639205     1194.359
## 2 108.6019 0.1881000 0.1343460   0.2951184   1.191152  0.060386961     1365.591
## 3 108.6792 0.1451861 0.1170814   0.3388685   1.210696  0.047227912     1409.294
## 4 110.3448 0.1507120 0.1165618   0.3828620   1.215372  0.007706105     1418.100
## 5 110.1807 0.1945415 0.1409913   0.3153519   1.215035  0.007774005     1417.315
## 6 104.0087 0.1816147 0.1291816   0.2795354 255.915811 32.439335339 63883404.450
##    Temp_slope
## 1 13.32576411
## 2 14.41436143
## 3  9.00731541
## 4  4.67463538
## 5  3.87500664
## 6  0.02893608

#corelatioon martix

cor_matrix <- cor(data_clean)
cor_matrix
##                 HR_mean   HRV_RMSSD    HRV_SDNN PPG_amp_var    ACC_mean
## HR_mean      1.00000000 -0.67533195 -0.78157656  -0.1331457 -0.03227821
## HRV_RMSSD   -0.67533195  1.00000000  0.89225823   0.1890452  0.03064209
## HRV_SDNN    -0.78157656  0.89225823  1.00000000   0.3095182  0.06817146
## PPG_amp_var -0.13314575  0.18904522  0.30951823   1.0000000  0.60363285
## ACC_mean    -0.03227821  0.03064209  0.06817146   0.6036329  1.00000000
## ACC_std     -0.04005273  0.09976204  0.14796437   0.6350773  0.85758125
## ACC_energy  -0.03536727  0.05391070  0.09999515   0.6276635  0.93327638
## Temp_slope   0.02801197 -0.11642909 -0.14545553  -0.4298691 -0.07872405
##                 ACC_std  ACC_energy  Temp_slope
## HR_mean     -0.04005273 -0.03536727  0.02801197
## HRV_RMSSD    0.09976204  0.05391070 -0.11642909
## HRV_SDNN     0.14796437  0.09999515 -0.14545553
## PPG_amp_var  0.63507726  0.62766349 -0.42986912
## ACC_mean     0.85758125  0.93327638 -0.07872405
## ACC_std      1.00000000  0.80400595 -0.19957730
## ACC_energy   0.80400595  1.00000000 -0.05864504
## Temp_slope  -0.19957730 -0.05864504  1.00000000

#bartlett test

library(psych)

cortest.bartlett(cor_matrix, n = nrow(data_num))
## $chisq
## [1] 5953.53
## 
## $p.value
## [1] 0
## 
## $df
## [1] 28

#KMO

KMO(cor_matrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor_matrix)
## Overall MSA =  0.7
## MSA for each item = 
##     HR_mean   HRV_RMSSD    HRV_SDNN PPG_amp_var    ACC_mean     ACC_std 
##        0.72        0.65        0.58        0.76        0.68        0.85 
##  ACC_energy  Temp_slope 
##        0.74        0.51

#Standarisasi

data_std <- scale(data_clean)

#PCA

pca <- prcomp(data_std)
summary(pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     1.8623 1.5669 1.0412 0.58111 0.57836 0.43486 0.27760
## Proportion of Variance 0.4335 0.3069 0.1355 0.04221 0.04181 0.02364 0.00963
## Cumulative Proportion  0.4335 0.7404 0.8759 0.91813 0.95994 0.98358 0.99321
##                            PC8
## Standard deviation     0.23301
## Proportion of Variance 0.00679
## Cumulative Proportion  1.00000

#eigenvalue

eigenvalue <- eigen(cor(data_std))$values
eigenvalue
## [1] 3.46815719 2.45518338 1.08401464 0.33768439 0.33450273 0.18910114 0.07706339
## [8] 0.05429313

#scree plot

plot(eigenvalue, type="b",
     main="Scree Plot",
     xlab="Komponen",
     ylab="Eigenvalue")
abline(h=1, col="red", lty=2)

#load pca

pca$rotation
##                    PC1         PC2         PC3         PC4          PC5
## HR_mean     -0.1897389  0.50769297 -0.16616768  0.56415717  0.546137823
## HRV_RMSSD    0.2226407 -0.52700710  0.05579930  0.25804143  0.526109050
## HRV_SDNN     0.2636306 -0.53343115  0.03155234  0.18550979  0.078776097
## PPG_amp_var  0.4352750  0.06962497 -0.30877583  0.60411321 -0.546558863
## ACC_mean     0.4564632  0.26233321  0.21240800 -0.21279789  0.080988220
## ACC_std      0.4603250  0.21204184  0.06686593 -0.16179512  0.291195410
## ACC_energy   0.4554662  0.24564422  0.22172729 -0.06194109  0.009785292
## Temp_slope  -0.1666063  0.03151826  0.87987810  0.37480309 -0.168999620
##                     PC6         PC7          PC8
## HR_mean      0.07443374  0.21143347  0.108862129
## HRV_RMSSD    0.16123365 -0.53555250 -0.115724608
## HRV_SDNN    -0.04773897  0.73929772  0.235573530
## PPG_amp_var -0.08958366 -0.19594385  0.015714841
## ACC_mean     0.23549241 -0.15791929  0.738558255
## ACC_std     -0.76346604  0.01358012 -0.211260144
## ACC_energy   0.54203281  0.23759415 -0.573466177
## Temp_slope  -0.16131474 -0.04416324  0.007239308

#FA

library(psych)

fa <- fa(r = data_std, nfactors = 3, rotate = "varimax")
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
summary(fa)
## 
## Factor analysis with Call: fa(r = data_std, nfactors = 3, rotate = "varimax")
## 
## Test of the hypothesis that 3 factors are sufficient.
## The degrees of freedom for the model is 7  and the objective function was  0.15 
## The number of observations was  843  with Chi Square =  126.45  with prob <  3.5e-24 
## 
## The root mean square of the residuals (RMSA) is  0.01 
## The df corrected root mean square of the residuals is  0.02 
## 
## Tucker Lewis Index of factoring reliability =  0.919
## RMSEA index =  0.142  and the 90 % confidence intervals are  0.121 0.165
## BIC =  79.29
fa$communality
##     HR_mean   HRV_RMSSD    HRV_SDNN PPG_amp_var    ACC_mean     ACC_std 
##   0.6109843   0.7752308   1.0112863   0.6935469   0.9701802   0.7679517 
##  ACC_energy  Temp_slope 
##   0.9048747   0.5351916