library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(e1071)
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:ggplot2':
##
## element
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
data <- read.csv("MetroPT3_Dataset.csv")
head(data)
## X timestamp TP2 TP3 H1 DV_pressure Reservoirs
## 1 0 2020-02-01 00:00:00 -0.012 9.358 9.340 -0.024 9.358
## 2 10 2020-02-01 00:00:10 -0.014 9.348 9.332 -0.022 9.348
## 3 20 2020-02-01 00:00:19 -0.012 9.338 9.322 -0.022 9.338
## 4 30 2020-02-01 00:00:29 -0.012 9.328 9.312 -0.022 9.328
## 5 40 2020-02-01 00:00:39 -0.012 9.318 9.302 -0.022 9.318
## 6 50 2020-02-01 00:00:49 -0.012 9.306 9.290 -0.024 9.308
## Oil_temperature Motor_current COMP DV_eletric Towers MPG LPS Pressure_switch
## 1 53.600 0.0400 1 0 1 1 0 1
## 2 53.675 0.0400 1 0 1 1 0 1
## 3 53.600 0.0425 1 0 1 1 0 1
## 4 53.425 0.0400 1 0 1 1 0 1
## 5 53.475 0.0400 1 0 1 1 0 1
## 6 53.500 0.0400 1 0 1 1 0 1
## Oil_level Caudal_impulses
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 1
# Cek struktur data
str(data)
## 'data.frame': 1516948 obs. of 17 variables:
## $ X : int 0 10 20 30 40 50 60 70 80 90 ...
## $ timestamp : chr "2020-02-01 00:00:00" "2020-02-01 00:00:10" "2020-02-01 00:00:19" "2020-02-01 00:00:29" ...
## $ TP2 : num -0.012 -0.014 -0.012 -0.012 -0.012 ...
## $ TP3 : num 9.36 9.35 9.34 9.33 9.32 ...
## $ H1 : num 9.34 9.33 9.32 9.31 9.3 ...
## $ DV_pressure : num -0.024 -0.022 -0.022 -0.022 -0.022 ...
## $ Reservoirs : num 9.36 9.35 9.34 9.33 9.32 ...
## $ Oil_temperature: num 53.6 53.7 53.6 53.4 53.5 ...
## $ Motor_current : num 0.04 0.04 0.0425 0.04 0.04 ...
## $ COMP : num 1 1 1 1 1 1 1 1 1 1 ...
## $ DV_eletric : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Towers : num 1 1 1 1 1 1 1 1 1 1 ...
## $ MPG : num 1 1 1 1 1 1 1 1 1 1 ...
## $ LPS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Pressure_switch: num 1 1 1 1 1 1 1 1 1 1 ...
## $ Oil_level : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Caudal_impulses: num 1 1 1 1 1 1 1 1 1 1 ...
summary(data)
## X timestamp TP2 TP3
## Min. : 0 Length:1516948 Min. :-0.032 Min. : 0.730
## 1st Qu.: 3792368 Class :character 1st Qu.:-0.014 1st Qu.: 8.492
## Median : 7584735 Mode :character Median :-0.012 Median : 8.960
## Mean : 7584735 Mean : 1.368 Mean : 8.985
## 3rd Qu.:11377103 3rd Qu.:-0.010 3rd Qu.: 9.492
## Max. :15169470 Max. :10.676 Max. :10.302
## H1 DV_pressure Reservoirs Oil_temperature
## Min. :-0.036 Min. :-0.03200 Min. : 0.712 Min. :15.40
## 1st Qu.: 8.254 1st Qu.:-0.02200 1st Qu.: 8.494 1st Qu.:57.77
## Median : 8.784 Median :-0.02000 Median : 8.960 Median :62.70
## Mean : 7.568 Mean : 0.05596 Mean : 8.985 Mean :62.64
## 3rd Qu.: 9.374 3rd Qu.:-0.01800 3rd Qu.: 9.492 3rd Qu.:67.25
## Max. :10.288 Max. : 9.84400 Max. :10.300 Max. :89.05
## Motor_current COMP DV_eletric Towers
## Min. :0.020 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.040 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.045 Median :1.000 Median :0.0000 Median :1.0000
## Mean :2.050 Mean :0.837 Mean :0.1606 Mean :0.9198
## 3rd Qu.:3.808 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :9.295 Max. :1.000 Max. :1.0000 Max. :1.0000
## MPG LPS Pressure_switch Oil_level
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:1.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.00000 Median :1.0000 Median :1.0000
## Mean :0.8327 Mean :0.00342 Mean :0.9914 Mean :0.9042
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## Caudal_impulses
## Min. :0.0000
## 1st Qu.:1.0000
## Median :1.0000
## Mean :0.9371
## 3rd Qu.:1.0000
## Max. :1.0000
Semua variabel yang berkarakteristik num akan cocok digunakan untuk PCA/FA. Variabel X dan timestamp tidak akan digunakan karena tidak berkorelasi dengan variabel lainnya.
DISTRIBUSI
Skew Kanan: mean > median
TP2 karena 1.368 > -0.012
DV_pressure karena 0.05596 > -0.02200
Motor_current karena 2.050 > 0.045
Cukup Simetris: mean ≈ median
TP3 karena 8.985 ≈ 8.960
Reservoirs karena 8.985 ≈ 8.960
Oil_temperature karena 62.64 ≈ 62.70
Skew Kiri : mean < median
Variabel COMP, MPG, Towers, LPS, Pressure_switch, Oil_level, Caudal_impulses, DV_electric dihapus karena memiliki rentang nilai 0-1 sehingga tidak informatif untuk PCA (variabel biner 0/1)
# menghapus Variabel yang tidak diperlukan
data_bersih <- data[, !(names(data) %in%
c("X","timestamp","COMP","DV_eletric","Towers","MPG",
"LPS","Pressure_switch","Oil_level",
"Caudal_impulses"))]
head(data_bersih)
## TP2 TP3 H1 DV_pressure Reservoirs Oil_temperature Motor_current
## 1 -0.012 9.358 9.340 -0.024 9.358 53.600 0.0400
## 2 -0.014 9.348 9.332 -0.022 9.348 53.675 0.0400
## 3 -0.012 9.338 9.322 -0.022 9.338 53.600 0.0425
## 4 -0.012 9.328 9.312 -0.022 9.328 53.425 0.0400
## 5 -0.012 9.318 9.302 -0.022 9.318 53.475 0.0400
## 6 -0.012 9.306 9.290 -0.024 9.308 53.500 0.0400
# Cek missing values pada data
colSums(is.na(data_bersih))
## TP2 TP3 H1 DV_pressure Reservoirs
## 0 0 0 0 0
## Oil_temperature Motor_current
## 0 0
tidak ada variabel yang mengandung missing values
str(data_bersih)
## 'data.frame': 1516948 obs. of 7 variables:
## $ TP2 : num -0.012 -0.014 -0.012 -0.012 -0.012 ...
## $ TP3 : num 9.36 9.35 9.34 9.33 9.32 ...
## $ H1 : num 9.34 9.33 9.32 9.31 9.3 ...
## $ DV_pressure : num -0.024 -0.022 -0.022 -0.022 -0.022 ...
## $ Reservoirs : num 9.36 9.35 9.34 9.33 9.32 ...
## $ Oil_temperature: num 53.6 53.7 53.6 53.4 53.5 ...
## $ Motor_current : num 0.04 0.04 0.0425 0.04 0.04 ...
describe(data_bersih)
## vars n mean sd median min max range skew kurtosis
## TP2 1 1516948 1.37 3.25 -0.01 -0.03 10.68 10.71 1.99 2.06
## TP3 2 1516948 8.98 0.64 8.96 0.73 10.30 9.57 -0.88 7.40
## H1 3 1516948 7.57 3.33 8.78 -0.04 10.29 10.32 -1.76 1.30
## DV_pressure 4 1516948 0.06 0.38 -0.02 -0.03 9.84 9.88 5.72 38.71
## Reservoirs 5 1516948 8.99 0.64 8.96 0.71 10.30 9.59 -0.89 7.48
## Oil_temperature 6 1516948 62.64 6.52 62.70 15.40 89.05 73.65 -0.05 -0.06
## Motor_current 7 1516948 2.05 2.30 0.04 0.02 9.30 9.28 0.45 -1.45
## se
## TP2 0.00
## TP3 0.00
## H1 0.00
## DV_pressure 0.00
## Reservoirs 0.00
## Oil_temperature 0.01
## Motor_current 0.00
# Histogram semua variabel
data_bersih %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(value)) +
geom_histogram(bins = 30, fill = "skyblue", color = "black") +
facet_wrap(~name, scales = "free")
# cek skew pada setiap variabel
skew <- apply(data_bersih, 2, skewness)
skew
## TP2 TP3 H1 DV_pressure Reservoirs
## 1.98627512 -0.87734769 -1.76495665 5.72056092 -0.88615260
## Oil_temperature Motor_current
## -0.04579043 0.44856055
skew normal: -0.5 hingga 0.5
oil_temperature
motor_current
skew moderate: (-1 hingga -0.5) dan (0.5 hingga 1)
TP3
Reservoirs
skew tinggi: >-1 dan >1
TP2
H1
DV_preasure
Tapi tidak perlu dilakukan transformasi karena dalam PCA maupun sebagian FA normalitas tidak begitu berpengaruh
data_scaled <- scale(data_bersih)
KMO(data_scaled)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_scaled)
## Overall MSA = 0.66
## MSA for each item =
## TP2 TP3 H1 DV_pressure Reservoirs
## 0.63 0.57 0.62 0.78 0.57
## Oil_temperature Motor_current
## 0.77 0.88
semua data telah memenuhi syarat untuk lolos PCA dan FA karena nilai KMO data masih berada di atas 0.50 walaupun data TP3 dan reservoirs termasuk dalam kategori poor tetapi masih bisa digunakan
cortest.bartlett(data_scaled)
## R was not square, finding R from data
## $chisq
## [1] 25514007
##
## $p.value
## [1] 0
##
## $df
## [1] 21
hasil p-valuenya memenuhi syarat yaitu p-value < 0.05, sehingga di data tersebut terdapat korelasi
pca_result <- prcomp(data_scaled, center = TRUE, scale. = TRUE)
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.739 1.5553 0.9402 0.67418 0.45123 0.12280 0.002344
## Proportion of Variance 0.432 0.3456 0.1263 0.06493 0.02909 0.00215 0.000000
## Cumulative Proportion 0.432 0.7775 0.9038 0.96876 0.99785 1.00000 1.000000
# Eigenvalue
eigenvalues <- pca_result$sdev^2
eigenvalues
## [1] 3.023767e+00 2.419002e+00 8.840140e-01 4.545223e-01 2.036101e-01
## [6] 1.507865e-02 5.496537e-06
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.739 1.5553 0.9402 0.67418 0.45123 0.12280 0.002344
## Proportion of Variance 0.432 0.3456 0.1263 0.06493 0.02909 0.00215 0.000000
## Cumulative Proportion 0.432 0.7775 0.9038 0.96876 0.99785 1.00000 1.000000
pca_res <- prcomp(data_scaled)
fviz_eig(pca_res,
addlabels = TRUE, # tampilkan %
ylim = c(0, 100)) # skala persen
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.
# Loadings
loadings <- pca_result$rotation
round(loadings, 3)
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## TP2 0.488 -0.258 0.312 0.050 0.355 0.686 0.002
## TP3 0.207 0.583 0.109 0.262 0.171 -0.083 -0.707
## H1 -0.431 0.375 -0.288 0.034 -0.272 0.718 0.001
## DV_pressure 0.298 -0.246 -0.682 0.619 -0.049 -0.014 0.000
## Reservoirs 0.207 0.583 0.108 0.263 0.171 -0.086 0.707
## Oil_temperature 0.363 0.213 -0.553 -0.681 0.231 0.004 0.000
## Motor_current 0.520 0.081 0.159 -0.108 -0.828 0.025 0.001
# Biplot
fviz_pca_biplot(pca_result, repel = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Skor PC
pc_scores <- as.data.frame(pca_result$x)
head(pc_scores)
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 -1.214119 0.6751298 0.6123896 1.212834 0.3174788 -0.03220056 -0.0006678903
## 2 -1.214132 0.6572954 0.5995672 1.199904 0.3149584 -0.03173088 -0.0006839713
## 3 -1.222633 0.6353849 0.6037723 1.199333 0.3070742 -0.03083338 -0.0006882551
## 4 -1.238130 0.6101866 0.6159287 1.209415 0.2972249 -0.03046900 -0.0006899999
## 5 -1.240535 0.5924351 0.6091596 1.195870 0.2944542 -0.02994866 -0.0007023612
## 6 -1.246279 0.5731007 0.6079115 1.180857 0.2906804 -0.02953916 0.0015015571
# Menentukan Jumlah Faktor
fa.parallel(data_scaled, fa = "fa")
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
# Run FA
fa_res <- fa(data_scaled, nfactors = 3, rotate = "varimax")
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=1.06368e+07, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=1.01049e+07, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=9.85231e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=9.73232e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=8.59235e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=8.02237e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.73738e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.59489e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.59132e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.58963e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.58883e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.58119e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57737e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57547e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57542e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.5754e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57538e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57538e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57538e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=1.06368e+07, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=8.11054e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.81055e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.66806e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.60037e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.5843e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57666e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57575e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57554e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57544e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57539e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57536e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57536e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57536e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
print(fa_res)
## Factor Analysis using method = minres
## Call: fa(r = data_scaled, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 MR3 h2 u2 com
## TP2 0.98 0.02 0.12 0.98 0.0204 1.0
## TP3 -0.04 0.99 0.11 1.00 0.0021 1.0
## H1 -0.97 0.19 -0.10 0.99 0.0119 1.1
## DV_pressure 0.37 -0.18 0.38 0.31 0.6879 2.4
## Reservoirs -0.04 0.99 0.11 1.00 0.0023 1.0
## Oil_temperature 0.14 0.31 0.91 0.95 0.0492 1.3
## Motor_current 0.66 0.41 0.34 0.72 0.2773 2.2
##
## MR1 MR2 MR3
## SS loadings 2.51 2.30 1.14
## Proportion Var 0.36 0.33 0.16
## Cumulative Var 0.36 0.69 0.85
## Proportion Explained 0.42 0.39 0.19
## Cumulative Proportion 0.42 0.81 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 21 with the objective function = 16.82 with Chi Square = 25514007
## df of the model are 3 and the objective function was 5.01
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is 0.01
##
## The harmonic n.obs is 1516948 with the empirical chi square 166.41 with prob < 7.6e-36
## The total n.obs was 1516948 with Likelihood Chi Square = 7602880 with prob < 0
##
## Tucker Lewis Index of factoring reliability = -1.086
## RMSEA index = 1.293 and the 90 % confidence intervals are 1.29 NA
## BIC = 7602838
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2 MR3
## Correlation of (regression) scores with factors 1.00 1 0.97
## Multiple R square of scores with factors 0.99 1 0.94
## Minimum correlation of possible factor scores 0.98 1 0.88
# Faktor Loading
fa_res$loadings
##
## Loadings:
## MR1 MR2 MR3
## TP2 0.982 0.124
## TP3 0.992 0.105
## H1 -0.970 0.193
## DV_pressure 0.372 -0.177 0.377
## Reservoirs 0.992 0.106
## Oil_temperature 0.136 0.314 0.913
## Motor_current 0.663 0.410 0.339
##
## MR1 MR2 MR3
## SS loadings 2.506 2.304 1.138
## Proportion Var 0.358 0.329 0.163
## Cumulative Var 0.358 0.687 0.850
# Visualisasi Faktor
fa.diagram(fa_res)
write.csv(pc_scores, "hasil_pca.csv")