Load Library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(e1071)
## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:ggplot2':
## 
##     element
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Load Data

data <- read.csv("MetroPT3_Dataset.csv")
head(data)
##    X           timestamp    TP2   TP3    H1 DV_pressure Reservoirs
## 1  0 2020-02-01 00:00:00 -0.012 9.358 9.340      -0.024      9.358
## 2 10 2020-02-01 00:00:10 -0.014 9.348 9.332      -0.022      9.348
## 3 20 2020-02-01 00:00:19 -0.012 9.338 9.322      -0.022      9.338
## 4 30 2020-02-01 00:00:29 -0.012 9.328 9.312      -0.022      9.328
## 5 40 2020-02-01 00:00:39 -0.012 9.318 9.302      -0.022      9.318
## 6 50 2020-02-01 00:00:49 -0.012 9.306 9.290      -0.024      9.308
##   Oil_temperature Motor_current COMP DV_eletric Towers MPG LPS Pressure_switch
## 1          53.600        0.0400    1          0      1   1   0               1
## 2          53.675        0.0400    1          0      1   1   0               1
## 3          53.600        0.0425    1          0      1   1   0               1
## 4          53.425        0.0400    1          0      1   1   0               1
## 5          53.475        0.0400    1          0      1   1   0               1
## 6          53.500        0.0400    1          0      1   1   0               1
##   Oil_level Caudal_impulses
## 1         1               1
## 2         1               1
## 3         1               1
## 4         1               1
## 5         1               1
## 6         1               1

Data Cleaning

# Cek struktur data
str(data)
## 'data.frame':    1516948 obs. of  17 variables:
##  $ X              : int  0 10 20 30 40 50 60 70 80 90 ...
##  $ timestamp      : chr  "2020-02-01 00:00:00" "2020-02-01 00:00:10" "2020-02-01 00:00:19" "2020-02-01 00:00:29" ...
##  $ TP2            : num  -0.012 -0.014 -0.012 -0.012 -0.012 ...
##  $ TP3            : num  9.36 9.35 9.34 9.33 9.32 ...
##  $ H1             : num  9.34 9.33 9.32 9.31 9.3 ...
##  $ DV_pressure    : num  -0.024 -0.022 -0.022 -0.022 -0.022 ...
##  $ Reservoirs     : num  9.36 9.35 9.34 9.33 9.32 ...
##  $ Oil_temperature: num  53.6 53.7 53.6 53.4 53.5 ...
##  $ Motor_current  : num  0.04 0.04 0.0425 0.04 0.04 ...
##  $ COMP           : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ DV_eletric     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Towers         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ MPG            : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ LPS            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Pressure_switch: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Oil_level      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Caudal_impulses: num  1 1 1 1 1 1 1 1 1 1 ...
summary(data)
##        X             timestamp              TP2              TP3        
##  Min.   :       0   Length:1516948     Min.   :-0.032   Min.   : 0.730  
##  1st Qu.: 3792368   Class :character   1st Qu.:-0.014   1st Qu.: 8.492  
##  Median : 7584735   Mode  :character   Median :-0.012   Median : 8.960  
##  Mean   : 7584735                      Mean   : 1.368   Mean   : 8.985  
##  3rd Qu.:11377103                      3rd Qu.:-0.010   3rd Qu.: 9.492  
##  Max.   :15169470                      Max.   :10.676   Max.   :10.302  
##        H1          DV_pressure         Reservoirs     Oil_temperature
##  Min.   :-0.036   Min.   :-0.03200   Min.   : 0.712   Min.   :15.40  
##  1st Qu.: 8.254   1st Qu.:-0.02200   1st Qu.: 8.494   1st Qu.:57.77  
##  Median : 8.784   Median :-0.02000   Median : 8.960   Median :62.70  
##  Mean   : 7.568   Mean   : 0.05596   Mean   : 8.985   Mean   :62.64  
##  3rd Qu.: 9.374   3rd Qu.:-0.01800   3rd Qu.: 9.492   3rd Qu.:67.25  
##  Max.   :10.288   Max.   : 9.84400   Max.   :10.300   Max.   :89.05  
##  Motor_current        COMP         DV_eletric         Towers      
##  Min.   :0.020   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.040   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :0.045   Median :1.000   Median :0.0000   Median :1.0000  
##  Mean   :2.050   Mean   :0.837   Mean   :0.1606   Mean   :0.9198  
##  3rd Qu.:3.808   3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :9.295   Max.   :1.000   Max.   :1.0000   Max.   :1.0000  
##       MPG              LPS          Pressure_switch    Oil_level     
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.00000   1st Qu.:1.0000   1st Qu.:1.0000  
##  Median :1.0000   Median :0.00000   Median :1.0000   Median :1.0000  
##  Mean   :0.8327   Mean   :0.00342   Mean   :0.9914   Mean   :0.9042  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000  
##  Caudal_impulses 
##  Min.   :0.0000  
##  1st Qu.:1.0000  
##  Median :1.0000  
##  Mean   :0.9371  
##  3rd Qu.:1.0000  
##  Max.   :1.0000

Semua variabel yang berkarakteristik num akan cocok digunakan untuk PCA/FA. Variabel X dan timestamp tidak akan digunakan karena tidak berkorelasi dengan variabel lainnya.

DISTRIBUSI

Skew Kanan: mean > median

Cukup Simetris: mean ≈ median

Skew Kiri : mean < median

Variabel COMP, MPG, Towers, LPS, Pressure_switch, Oil_level, Caudal_impulses, DV_electric dihapus karena memiliki rentang nilai 0-1 sehingga tidak informatif untuk PCA (variabel biner 0/1)

# menghapus Variabel yang tidak diperlukan
data_bersih <- data[, !(names(data) %in%
  c("X","timestamp","COMP","DV_eletric","Towers","MPG",
    "LPS","Pressure_switch","Oil_level",
    "Caudal_impulses"))]
head(data_bersih)
##      TP2   TP3    H1 DV_pressure Reservoirs Oil_temperature Motor_current
## 1 -0.012 9.358 9.340      -0.024      9.358          53.600        0.0400
## 2 -0.014 9.348 9.332      -0.022      9.348          53.675        0.0400
## 3 -0.012 9.338 9.322      -0.022      9.338          53.600        0.0425
## 4 -0.012 9.328 9.312      -0.022      9.328          53.425        0.0400
## 5 -0.012 9.318 9.302      -0.022      9.318          53.475        0.0400
## 6 -0.012 9.306 9.290      -0.024      9.308          53.500        0.0400
# Cek missing values pada data
colSums(is.na(data_bersih))
##             TP2             TP3              H1     DV_pressure      Reservoirs 
##               0               0               0               0               0 
## Oil_temperature   Motor_current 
##               0               0

tidak ada variabel yang mengandung missing values

str(data_bersih)
## 'data.frame':    1516948 obs. of  7 variables:
##  $ TP2            : num  -0.012 -0.014 -0.012 -0.012 -0.012 ...
##  $ TP3            : num  9.36 9.35 9.34 9.33 9.32 ...
##  $ H1             : num  9.34 9.33 9.32 9.31 9.3 ...
##  $ DV_pressure    : num  -0.024 -0.022 -0.022 -0.022 -0.022 ...
##  $ Reservoirs     : num  9.36 9.35 9.34 9.33 9.32 ...
##  $ Oil_temperature: num  53.6 53.7 53.6 53.4 53.5 ...
##  $ Motor_current  : num  0.04 0.04 0.0425 0.04 0.04 ...
describe(data_bersih)
##                 vars       n  mean   sd median   min   max range  skew kurtosis
## TP2                1 1516948  1.37 3.25  -0.01 -0.03 10.68 10.71  1.99     2.06
## TP3                2 1516948  8.98 0.64   8.96  0.73 10.30  9.57 -0.88     7.40
## H1                 3 1516948  7.57 3.33   8.78 -0.04 10.29 10.32 -1.76     1.30
## DV_pressure        4 1516948  0.06 0.38  -0.02 -0.03  9.84  9.88  5.72    38.71
## Reservoirs         5 1516948  8.99 0.64   8.96  0.71 10.30  9.59 -0.89     7.48
## Oil_temperature    6 1516948 62.64 6.52  62.70 15.40 89.05 73.65 -0.05    -0.06
## Motor_current      7 1516948  2.05 2.30   0.04  0.02  9.30  9.28  0.45    -1.45
##                   se
## TP2             0.00
## TP3             0.00
## H1              0.00
## DV_pressure     0.00
## Reservoirs      0.00
## Oil_temperature 0.01
## Motor_current   0.00
# Histogram semua variabel
data_bersih %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(value)) +
  geom_histogram(bins = 30, fill = "skyblue", color = "black") +
  facet_wrap(~name, scales = "free")

# cek skew pada setiap variabel
skew <- apply(data_bersih, 2, skewness)
skew
##             TP2             TP3              H1     DV_pressure      Reservoirs 
##      1.98627512     -0.87734769     -1.76495665      5.72056092     -0.88615260 
## Oil_temperature   Motor_current 
##     -0.04579043      0.44856055

skew normal: -0.5 hingga 0.5

skew moderate: (-1 hingga -0.5) dan (0.5 hingga 1)

skew tinggi: >-1 dan >1

Tapi tidak perlu dilakukan transformasi karena dalam PCA maupun sebagian FA normalitas tidak begitu berpengaruh

Standardisasi

data_scaled <- scale(data_bersih)

Uji Asumsi

Uji KMO

KMO(data_scaled)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_scaled)
## Overall MSA =  0.66
## MSA for each item = 
##             TP2             TP3              H1     DV_pressure      Reservoirs 
##            0.63            0.57            0.62            0.78            0.57 
## Oil_temperature   Motor_current 
##            0.77            0.88

semua data telah memenuhi syarat untuk lolos PCA dan FA karena nilai KMO data masih berada di atas 0.50 walaupun data TP3 dan reservoirs termasuk dalam kategori poor tetapi masih bisa digunakan

Bartlett Test

cortest.bartlett(data_scaled)
## R was not square, finding R from data
## $chisq
## [1] 25514007
## 
## $p.value
## [1] 0
## 
## $df
## [1] 21

hasil p-valuenya memenuhi syarat yaitu p-value < 0.05, sehingga di data tersebut terdapat korelasi

PCA

pca_result <- prcomp(data_scaled, center = TRUE, scale. = TRUE)
summary(pca_result)
## Importance of components:
##                          PC1    PC2    PC3     PC4     PC5     PC6      PC7
## Standard deviation     1.739 1.5553 0.9402 0.67418 0.45123 0.12280 0.002344
## Proportion of Variance 0.432 0.3456 0.1263 0.06493 0.02909 0.00215 0.000000
## Cumulative Proportion  0.432 0.7775 0.9038 0.96876 0.99785 1.00000 1.000000
# Eigenvalue
eigenvalues <- pca_result$sdev^2
eigenvalues
## [1] 3.023767e+00 2.419002e+00 8.840140e-01 4.545223e-01 2.036101e-01
## [6] 1.507865e-02 5.496537e-06
summary(pca_result)
## Importance of components:
##                          PC1    PC2    PC3     PC4     PC5     PC6      PC7
## Standard deviation     1.739 1.5553 0.9402 0.67418 0.45123 0.12280 0.002344
## Proportion of Variance 0.432 0.3456 0.1263 0.06493 0.02909 0.00215 0.000000
## Cumulative Proportion  0.432 0.7775 0.9038 0.96876 0.99785 1.00000 1.000000
pca_res <- prcomp(data_scaled)

fviz_eig(pca_res,
         addlabels = TRUE,   # tampilkan %
         ylim = c(0, 100))   # skala persen
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

# Loadings
loadings <- pca_result$rotation
round(loadings, 3)
##                    PC1    PC2    PC3    PC4    PC5    PC6    PC7
## TP2              0.488 -0.258  0.312  0.050  0.355  0.686  0.002
## TP3              0.207  0.583  0.109  0.262  0.171 -0.083 -0.707
## H1              -0.431  0.375 -0.288  0.034 -0.272  0.718  0.001
## DV_pressure      0.298 -0.246 -0.682  0.619 -0.049 -0.014  0.000
## Reservoirs       0.207  0.583  0.108  0.263  0.171 -0.086  0.707
## Oil_temperature  0.363  0.213 -0.553 -0.681  0.231  0.004  0.000
## Motor_current    0.520  0.081  0.159 -0.108 -0.828  0.025  0.001
# Biplot
fviz_pca_biplot(pca_result, repel = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Skor PC
pc_scores <- as.data.frame(pca_result$x)
head(pc_scores)
##         PC1       PC2       PC3      PC4       PC5         PC6           PC7
## 1 -1.214119 0.6751298 0.6123896 1.212834 0.3174788 -0.03220056 -0.0006678903
## 2 -1.214132 0.6572954 0.5995672 1.199904 0.3149584 -0.03173088 -0.0006839713
## 3 -1.222633 0.6353849 0.6037723 1.199333 0.3070742 -0.03083338 -0.0006882551
## 4 -1.238130 0.6101866 0.6159287 1.209415 0.2972249 -0.03046900 -0.0006899999
## 5 -1.240535 0.5924351 0.6091596 1.195870 0.2944542 -0.02994866 -0.0007023612
## 6 -1.246279 0.5731007 0.6079115 1.180857 0.2906804 -0.02953916  0.0015015571

FA

# Menentukan Jumlah Faktor
fa.parallel(data_scaled, fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
# Run FA
fa_res <- fa(data_scaled, nfactors = 3, rotate = "varimax")
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=1.06368e+07, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=1.01049e+07, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=9.85231e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=9.73232e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=8.59235e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=8.02237e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.73738e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.59489e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.59132e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.58963e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.58883e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.58119e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57737e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57547e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57542e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.5754e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57538e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57538e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57538e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=1.06368e+07, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=8.11054e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.81055e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.66806e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.60037e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.5843e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57666e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57575e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57554e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57544e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57539e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57536e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57536e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57536e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.60288e+06, f=3, theta=7.57535e+06, ..): not converged in 1000000
## iter.
print(fa_res)
## Factor Analysis using method =  minres
## Call: fa(r = data_scaled, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                   MR1   MR2   MR3   h2     u2 com
## TP2              0.98  0.02  0.12 0.98 0.0204 1.0
## TP3             -0.04  0.99  0.11 1.00 0.0021 1.0
## H1              -0.97  0.19 -0.10 0.99 0.0119 1.1
## DV_pressure      0.37 -0.18  0.38 0.31 0.6879 2.4
## Reservoirs      -0.04  0.99  0.11 1.00 0.0023 1.0
## Oil_temperature  0.14  0.31  0.91 0.95 0.0492 1.3
## Motor_current    0.66  0.41  0.34 0.72 0.2773 2.2
## 
##                        MR1  MR2  MR3
## SS loadings           2.51 2.30 1.14
## Proportion Var        0.36 0.33 0.16
## Cumulative Var        0.36 0.69 0.85
## Proportion Explained  0.42 0.39 0.19
## Cumulative Proportion 0.42 0.81 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 3 factors are sufficient.
## 
## df null model =  21  with the objective function =  16.82 with Chi Square =  25514007
## df of  the model are 3  and the objective function was  5.01 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  0.01 
## 
## The harmonic n.obs is  1516948 with the empirical chi square  166.41  with prob <  7.6e-36 
## The total n.obs was  1516948  with Likelihood Chi Square =  7602880  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  -1.086
## RMSEA index =  1.293  and the 90 % confidence intervals are  1.29 NA
## BIC =  7602838
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1 MR2  MR3
## Correlation of (regression) scores with factors   1.00   1 0.97
## Multiple R square of scores with factors          0.99   1 0.94
## Minimum correlation of possible factor scores     0.98   1 0.88
# Faktor Loading
fa_res$loadings
## 
## Loadings:
##                 MR1    MR2    MR3   
## TP2              0.982         0.124
## TP3                     0.992  0.105
## H1              -0.970  0.193       
## DV_pressure      0.372 -0.177  0.377
## Reservoirs              0.992  0.106
## Oil_temperature  0.136  0.314  0.913
## Motor_current    0.663  0.410  0.339
## 
##                  MR1   MR2   MR3
## SS loadings    2.506 2.304 1.138
## Proportion Var 0.358 0.329 0.163
## Cumulative Var 0.358 0.687 0.850
# Visualisasi Faktor
fa.diagram(fa_res)

Export Hasil

write.csv(pc_scores, "hasil_pca.csv")