# import libs
library(tidyverse)
library(GGally)
library(FactoMineR)
library(factoextra)
airplane <- read.csv("train.csv")
head(airplane)
#>   X     id Gender     Customer.Type Age  Type.of.Travel    Class
#> 1 0  70172   Male    Loyal Customer  13 Personal Travel Eco Plus
#> 2 1   5047   Male disloyal Customer  25 Business travel Business
#> 3 2 110028 Female    Loyal Customer  26 Business travel Business
#> 4 3  24026 Female    Loyal Customer  25 Business travel Business
#> 5 4 119299   Male    Loyal Customer  61 Business travel Business
#> 6 5 111157 Female    Loyal Customer  26 Personal Travel      Eco
#>   Flight.Distance Inflight.wifi.service Departure.Arrival.time.convenient
#> 1             460                     3                                 4
#> 2             235                     3                                 2
#> 3            1142                     2                                 2
#> 4             562                     2                                 5
#> 5             214                     3                                 3
#> 6            1180                     3                                 4
#>   Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1                      3             1              5               3
#> 2                      3             3              1               3
#> 3                      2             2              5               5
#> 4                      5             5              2               2
#> 5                      3             3              4               5
#> 6                      2             1              1               2
#>   Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1            5                      5                4                3
#> 2            1                      1                1                5
#> 3            5                      5                4                3
#> 4            2                      2                2                5
#> 5            5                      3                3                4
#> 6            1                      1                3                4
#>   Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1                4               4                5           5
#> 2                3               1                4           1
#> 3                4               4                4           5
#> 4                3               1                4           2
#> 5                4               3                3           3
#> 6                4               4                4           1
#>   Departure.Delay.in.Minutes Arrival.Delay.in.Minutes            satisfaction
#> 1                         25                       18 neutral or dissatisfied
#> 2                          1                        6 neutral or dissatisfied
#> 3                          0                        0               satisfied
#> 4                         11                        9 neutral or dissatisfied
#> 5                          0                        0               satisfied
#> 6                          0                        0 neutral or dissatisfied

Gender: Gender of the passengers (Female, Male)

Customer Type: The customer type (Loyal customer, disloyal customer)

Age: The actual age of the passengers

Type of Travel: Purpose of the flight of the passengers (Personal Travel, Business Travel)

Class: Travel class in the plane of the passengers (Business, Eco, Eco Plus)

Flight distance: The flight distance of this journey

Inflight wifi service: Satisfaction level of the inflight wifi service (0:Not Applicable;1-5)

Departure/Arrival time convenient: Satisfaction level of Departure/Arrival time convenient

Ease of Online booking: Satisfaction level of online booking

Gate location: Satisfaction level of Gate location

Food and drink: Satisfaction level of Food and drink

Online boarding: Satisfaction level of online boarding

Seat comfort: Satisfaction level of Seat comfort

Inflight entertainment: Satisfaction level of inflight entertainment

On-board service: Satisfaction level of On-board service

Leg room service: Satisfaction level of Leg room service

Baggage handling: Satisfaction level of baggage handling

Check-in service: Satisfaction level of Check-in service

Inflight service: Satisfaction level of inflight service

Cleanliness: Satisfaction level of Cleanliness

Departure Delay in Minutes: Minutes delayed when departure

Arrival Delay in Minutes: Minutes delayed when Arrival

Satisfaction: Airline satisfaction level(Satisfaction, neutral or dissatisfaction)

glimpse(airplane)
#> Rows: 103,904
#> Columns: 25
#> $ X                                 <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1…
#> $ id                                <int> 70172, 5047, 110028, 24026, 119299,…
#> $ Gender                            <chr> "Male", "Male", "Female", "Female",…
#> $ Customer.Type                     <chr> "Loyal Customer", "disloyal Custome…
#> $ Age                               <int> 13, 25, 26, 25, 61, 26, 47, 52, 41,…
#> $ Type.of.Travel                    <chr> "Personal Travel", "Business travel…
#> $ Class                             <chr> "Eco Plus", "Business", "Business",…
#> $ Flight.Distance                   <int> 460, 235, 1142, 562, 214, 1180, 127…
#> $ Inflight.wifi.service             <int> 3, 3, 2, 2, 3, 3, 2, 4, 1, 3, 4, 2,…
#> $ Departure.Arrival.time.convenient <int> 4, 2, 2, 5, 3, 4, 4, 3, 2, 3, 5, 4,…
#> $ Ease.of.Online.booking            <int> 3, 3, 2, 5, 3, 2, 2, 4, 2, 3, 5, 2,…
#> $ Gate.location                     <int> 1, 3, 2, 5, 3, 1, 3, 4, 2, 4, 4, 2,…
#> $ Food.and.drink                    <int> 5, 1, 5, 2, 4, 1, 2, 5, 4, 2, 2, 1,…
#> $ Online.boarding                   <int> 3, 3, 5, 2, 5, 2, 2, 5, 3, 3, 5, 2,…
#> $ Seat.comfort                      <int> 5, 1, 5, 2, 5, 1, 2, 5, 3, 3, 2, 1,…
#> $ Inflight.entertainment            <int> 5, 1, 5, 2, 3, 1, 2, 5, 1, 2, 2, 1,…
#> $ On.board.service                  <int> 4, 1, 4, 2, 3, 3, 3, 5, 1, 2, 3, 1,…
#> $ Leg.room.service                  <int> 3, 5, 3, 5, 4, 4, 3, 5, 2, 3, 3, 2,…
#> $ Baggage.handling                  <int> 4, 3, 4, 3, 4, 4, 4, 5, 1, 4, 5, 5,…
#> $ Checkin.service                   <int> 4, 1, 4, 1, 3, 4, 3, 4, 4, 4, 3, 5,…
#> $ Inflight.service                  <int> 5, 4, 4, 4, 3, 4, 5, 5, 1, 3, 5, 5,…
#> $ Cleanliness                       <int> 5, 1, 5, 2, 3, 1, 2, 4, 2, 2, 2, 1,…
#> $ Departure.Delay.in.Minutes        <int> 25, 1, 0, 11, 0, 0, 9, 4, 0, 0, 0, …
#> $ Arrival.Delay.in.Minutes          <dbl> 18, 6, 0, 9, 0, 0, 23, 0, 0, 0, 0, …
#> $ satisfaction                      <chr> "neutral or dissatisfied", "neutral…

Membuang kolom ang tidak diperlukan dalam proses pengerjaan Unsupervised Learning - X - id

airplane <- airplane %>% 
  select(-c(1,2))
glimpse(airplane)
#> Rows: 103,904
#> Columns: 23
#> $ Gender                            <chr> "Male", "Male", "Female", "Female",…
#> $ Customer.Type                     <chr> "Loyal Customer", "disloyal Custome…
#> $ Age                               <int> 13, 25, 26, 25, 61, 26, 47, 52, 41,…
#> $ Type.of.Travel                    <chr> "Personal Travel", "Business travel…
#> $ Class                             <chr> "Eco Plus", "Business", "Business",…
#> $ Flight.Distance                   <int> 460, 235, 1142, 562, 214, 1180, 127…
#> $ Inflight.wifi.service             <int> 3, 3, 2, 2, 3, 3, 2, 4, 1, 3, 4, 2,…
#> $ Departure.Arrival.time.convenient <int> 4, 2, 2, 5, 3, 4, 4, 3, 2, 3, 5, 4,…
#> $ Ease.of.Online.booking            <int> 3, 3, 2, 5, 3, 2, 2, 4, 2, 3, 5, 2,…
#> $ Gate.location                     <int> 1, 3, 2, 5, 3, 1, 3, 4, 2, 4, 4, 2,…
#> $ Food.and.drink                    <int> 5, 1, 5, 2, 4, 1, 2, 5, 4, 2, 2, 1,…
#> $ Online.boarding                   <int> 3, 3, 5, 2, 5, 2, 2, 5, 3, 3, 5, 2,…
#> $ Seat.comfort                      <int> 5, 1, 5, 2, 5, 1, 2, 5, 3, 3, 2, 1,…
#> $ Inflight.entertainment            <int> 5, 1, 5, 2, 3, 1, 2, 5, 1, 2, 2, 1,…
#> $ On.board.service                  <int> 4, 1, 4, 2, 3, 3, 3, 5, 1, 2, 3, 1,…
#> $ Leg.room.service                  <int> 3, 5, 3, 5, 4, 4, 3, 5, 2, 3, 3, 2,…
#> $ Baggage.handling                  <int> 4, 3, 4, 3, 4, 4, 4, 5, 1, 4, 5, 5,…
#> $ Checkin.service                   <int> 4, 1, 4, 1, 3, 4, 3, 4, 4, 4, 3, 5,…
#> $ Inflight.service                  <int> 5, 4, 4, 4, 3, 4, 5, 5, 1, 3, 5, 5,…
#> $ Cleanliness                       <int> 5, 1, 5, 2, 3, 1, 2, 4, 2, 2, 2, 1,…
#> $ Departure.Delay.in.Minutes        <int> 25, 1, 0, 11, 0, 0, 9, 4, 0, 0, 0, …
#> $ Arrival.Delay.in.Minutes          <dbl> 18, 6, 0, 9, 0, 0, 23, 0, 0, 0, 0, …
#> $ satisfaction                      <chr> "neutral or dissatisfied", "neutral…
colSums(is.na(airplane))
#>                            Gender                     Customer.Type 
#>                                 0                                 0 
#>                               Age                    Type.of.Travel 
#>                                 0                                 0 
#>                             Class                   Flight.Distance 
#>                                 0                                 0 
#>             Inflight.wifi.service Departure.Arrival.time.convenient 
#>                                 0                                 0 
#>            Ease.of.Online.booking                     Gate.location 
#>                                 0                                 0 
#>                    Food.and.drink                   Online.boarding 
#>                                 0                                 0 
#>                      Seat.comfort            Inflight.entertainment 
#>                                 0                                 0 
#>                  On.board.service                  Leg.room.service 
#>                                 0                                 0 
#>                  Baggage.handling                   Checkin.service 
#>                                 0                                 0 
#>                  Inflight.service                       Cleanliness 
#>                                 0                                 0 
#>        Departure.Delay.in.Minutes          Arrival.Delay.in.Minutes 
#>                                 0                               310 
#>                      satisfaction 
#>                                 0
airplane <- drop_na(data = airplane)
colSums(is.na(airplane))
#>                            Gender                     Customer.Type 
#>                                 0                                 0 
#>                               Age                    Type.of.Travel 
#>                                 0                                 0 
#>                             Class                   Flight.Distance 
#>                                 0                                 0 
#>             Inflight.wifi.service Departure.Arrival.time.convenient 
#>                                 0                                 0 
#>            Ease.of.Online.booking                     Gate.location 
#>                                 0                                 0 
#>                    Food.and.drink                   Online.boarding 
#>                                 0                                 0 
#>                      Seat.comfort            Inflight.entertainment 
#>                                 0                                 0 
#>                  On.board.service                  Leg.room.service 
#>                                 0                                 0 
#>                  Baggage.handling                   Checkin.service 
#>                                 0                                 0 
#>                  Inflight.service                       Cleanliness 
#>                                 0                                 0 
#>        Departure.Delay.in.Minutes          Arrival.Delay.in.Minutes 
#>                                 0                                 0 
#>                      satisfaction 
#>                                 0

Cek Nilai Variansi

airplane %>% 
  select_if(is.numeric) %>% 
  var()
#>                                             Age Flight.Distance
#> Age                                228.40655557     1504.779961
#> Flight.Distance                   1504.77996080   994601.774592
#> Inflight.wifi.service                0.35058863        9.336387
#> Departure.Arrival.time.convenient    0.87680615      -30.282684
#> Ease.of.Online.booking               0.51717040       91.657110
#> Gate.location                       -0.03008399        6.030465
#> Food.and.drink                       0.46049850       75.513602
#> Online.boarding                      4.25587545      289.600340
#> Seat.comfort                         3.19523624      207.186796
#> Inflight.entertainment               1.53876555      171.023591
#> On.board.service                     1.11218758      140.737311
#> Leg.room.service                     0.80509199      175.577729
#> Baggage.handling                    -0.84997363       74.467137
#> Checkin.service                      0.66940264       92.406337
#> Inflight.service                    -0.88656527       67.332932
#> Cleanliness                          1.06084342      121.862023
#> Departure.Delay.in.Minutes          -5.84723167       72.444795
#> Arrival.Delay.in.Minutes            -7.10419069      -93.621977
#>                                   Inflight.wifi.service
#> Age                                           0.3505886
#> Flight.Distance                               9.3363872
#> Inflight.wifi.service                         1.7632288
#> Departure.Arrival.time.convenient             0.6962152
#> Ease.of.Online.booking                        1.3297571
#> Gate.location                                 0.5702877
#> Food.and.drink                                0.2376098
#> Online.boarding                               0.8188866
#> Seat.comfort                                  0.2147409
#> Inflight.entertainment                        0.3708563
#> On.board.service                              0.2078183
#> Leg.room.service                              0.2803164
#> Baggage.handling                              0.1898556
#> Checkin.service                               0.0725514
#> Inflight.service                              0.1726926
#> Cleanliness                                   0.2311350
#> Departure.Delay.in.Minutes                   -0.8832421
#> Arrival.Delay.in.Minutes                     -0.9812316
#>                                   Departure.Arrival.time.convenient
#> Age                                                     0.876806148
#> Flight.Distance                                       -30.282683759
#> Inflight.wifi.service                                   0.696215181
#> Departure.Arrival.time.convenient                       2.326335348
#> Ease.of.Online.booking                                  0.932471611
#> Gate.location                                           0.866448429
#> Food.and.drink                                          0.010521659
#> Online.boarding                                         0.144053226
#> Seat.comfort                                            0.022963941
#> Inflight.entertainment                                 -0.009521065
#> On.board.service                                        0.134802944
#> Leg.room.service                                        0.025000553
#> Baggage.handling                                        0.129520133
#> Checkin.service                                         0.180126996
#> Inflight.service                                        0.131301374
#> Cleanliness                                             0.028693693
#> Departure.Delay.in.Minutes                              0.045966689
#> Arrival.Delay.in.Minutes                               -0.051008212
#>                                   Ease.of.Online.booking Gate.location
#> Age                                           0.51717040  -0.030083990
#> Flight.Distance                              91.65710971   6.030465199
#> Inflight.wifi.service                         1.32975707   0.570287678
#> Departure.Arrival.time.convenient             0.93247161   0.866448429
#> Ease.of.Online.booking                        1.95701517   0.819984187
#> Gate.location                                 0.81998419   1.632574799
#> Food.and.drink                                0.05940085  -0.001987982
#> Online.boarding                               0.76283403   0.002501937
#> Seat.comfort                                  0.05539062   0.005701547
#> Inflight.entertainment                        0.08799089   0.006070411
#> On.board.service                              0.06985204  -0.046966402
#> Leg.room.service                              0.19769204  -0.009862311
#> Baggage.handling                              0.06419087   0.003652906
#> Checkin.service                               0.01939632  -0.057317696
#> Inflight.service                              0.05810392   0.002616116
#> Cleanliness                                   0.02972249  -0.006731878
#> Departure.Delay.in.Minutes                   -0.33550176   0.269459278
#> Arrival.Delay.in.Minutes                     -0.43220849   0.254295699
#>                                   Food.and.drink Online.boarding  Seat.comfort
#> Age                                  0.460498503     4.255875455   3.195236243
#> Flight.Distance                     75.513602178   289.600339811 207.186796038
#> Inflight.wifi.service                0.237609766     0.818886582   0.214740926
#> Departure.Arrival.time.convenient    0.010521659     0.144053226   0.022963941
#> Ease.of.Online.booking               0.059400849     0.762834027   0.055390616
#> Gate.location                       -0.001987982     0.002501937   0.005701547
#> Food.and.drink                       1.767307559     0.420663950   1.007401695
#> Online.boarding                      0.420663950     1.820968592   0.747619068
#> Seat.comfort                         1.007401695     0.747619068   1.739486574
#> Inflight.entertainment               1.102928704     0.513017727   1.073538262
#> On.board.service                     0.101044950     0.270059711   0.224334466
#> Leg.room.service                     0.056683575     0.219717093   0.182939389
#> Baggage.handling                     0.054657192     0.132757214   0.116129738
#> Checkin.service                      0.146446074     0.348699501   0.319673852
#> Inflight.service                     0.053257502     0.118012325   0.107283783
#> Cleanliness                          1.147221679     0.586989474   1.174206912
#> Departure.Delay.in.Minutes          -1.519302426    -0.952342522  -1.373566363
#> Arrival.Delay.in.Minutes            -1.673243800    -1.146184606  -1.526088128
#>                                   Inflight.entertainment On.board.service
#> Age                                          1.538765547       1.11218758
#> Flight.Distance                            171.023590810     140.73731125
#> Inflight.wifi.service                        0.370856339       0.20781831
#> Departure.Arrival.time.convenient           -0.009521065       0.13480294
#> Ease.of.Online.booking                       0.087990889       0.06985204
#> Gate.location                                0.006070411      -0.04696640
#> Food.and.drink                               1.102928704       0.10104495
#> Online.boarding                              0.513017727       0.27005971
#> Seat.comfort                                 1.073538262       0.22433447
#> Inflight.entertainment                       1.776970040       0.72188070
#> On.board.service                             0.721880702       1.65967689
#> Leg.room.service                             0.525781710       0.60270369
#> Baggage.handling                             0.595682229       0.79005770
#> Checkin.service                              0.203787271       0.39752558
#> Inflight.service                             0.635068667       0.83407948
#> Cleanliness                                  1.209979192       0.20832848
#> Departure.Delay.in.Minutes                  -1.406996555      -1.54555147
#> Arrival.Delay.in.Minutes                    -1.583851186      -1.75622960
#>                                   Leg.room.service Baggage.handling
#> Age                                    0.805091989     -0.849973626
#> Flight.Distance                      175.577728835     74.467136690
#> Inflight.wifi.service                  0.280316362      0.189855626
#> Departure.Arrival.time.convenient      0.025000553      0.129520133
#> Ease.of.Online.booking                 0.197692041      0.064190866
#> Gate.location                         -0.009862311      0.003652906
#> Food.and.drink                         0.056683575      0.054657192
#> Online.boarding                        0.219717093      0.132757214
#> Seat.comfort                           0.182939389      0.116129738
#> Inflight.entertainment                 0.525781710      0.595682229
#> On.board.service                       0.602703691      0.790057703
#> Leg.room.service                       1.730299941      0.574311896
#> Baggage.handling                       0.574311896      1.394882123
#> Checkin.service                        0.254802378      0.348705323
#> Inflight.service                       0.570505493      0.873256280
#> Cleanliness                            0.166394388      0.148441242
#> Departure.Delay.in.Minutes             0.718810866     -0.255841464
#> Arrival.Delay.in.Minutes               0.602874458     -0.390398284
#>                                   Checkin.service Inflight.service
#> Age                                    0.66940264     -0.886565269
#> Flight.Distance                       92.40633705     67.332931772
#> Inflight.wifi.service                  0.07255140      0.172692617
#> Departure.Arrival.time.convenient      0.18012700      0.131301374
#> Ease.of.Online.booking                 0.01939632      0.058103920
#> Gate.location                         -0.05731770      0.002616116
#> Food.and.drink                         0.14644607      0.053257502
#> Online.boarding                        0.34869950      0.118012325
#> Seat.comfort                           0.31967385      0.107283783
#> Inflight.entertainment                 0.20378727      0.635068667
#> On.board.service                       0.39752558      0.834079475
#> Leg.room.service                       0.25480238      0.570505493
#> Baggage.handling                       0.34870532      0.873256280
#> Checkin.service                        1.60122715      0.352942450
#> Inflight.service                       0.35294245      1.382042439
#> Cleanliness                            0.29793569      0.137125679
#> Departure.Delay.in.Minutes            -0.87133809     -2.439985786
#> Arrival.Delay.in.Minutes              -0.99747099     -2.693072766
#>                                     Cleanliness Departure.Delay.in.Minutes
#> Age                                 1.060843423                -5.84723167
#> Flight.Distance                   121.862023254                72.44479531
#> Inflight.wifi.service               0.231134995                -0.88324209
#> Departure.Arrival.time.convenient   0.028693693                 0.04596669
#> Ease.of.Online.booking              0.029722489                -0.33550176
#> Gate.location                      -0.006731878                 0.26945928
#> Food.and.drink                      1.147221679                -1.51930243
#> Online.boarding                     0.586989474                -0.95234252
#> Seat.comfort                        1.174206912                -1.37356636
#> Inflight.entertainment              1.209979192                -1.40699656
#> On.board.service                    0.208328484                -1.54555147
#> Leg.room.service                    0.166394388                 0.71881087
#> Baggage.handling                    0.148441242                -0.25584146
#> Checkin.service                     0.297935686                -0.87133809
#> Inflight.service                    0.137125679                -2.43998579
#> Cleanliness                         1.721852736                -0.69197344
#> Departure.Delay.in.Minutes         -0.691973437              1452.88564078
#> Arrival.Delay.in.Minutes           -0.800992408              1424.14948547
#>                                   Arrival.Delay.in.Minutes
#> Age                                            -7.10419069
#> Flight.Distance                               -93.62197729
#> Inflight.wifi.service                          -0.98123157
#> Departure.Arrival.time.convenient              -0.05100821
#> Ease.of.Online.booking                         -0.43220849
#> Gate.location                                   0.25429570
#> Food.and.drink                                 -1.67324380
#> Online.boarding                                -1.14618461
#> Seat.comfort                                   -1.52608813
#> Inflight.entertainment                         -1.58385119
#> On.board.service                               -1.75622960
#> Leg.room.service                                0.60287446
#> Baggage.handling                               -0.39039828
#> Checkin.service                                -0.99747099
#> Inflight.service                               -2.69307277
#> Cleanliness                                    -0.80099241
#> Departure.Delay.in.Minutes                   1424.14948547
#> Arrival.Delay.in.Minutes                     1497.58799016

PCA

Principle Component Analysis (PCA) merupakan salah satu metode untuk mereduksi dimensi dengan cara merangkum informasi (variance) dari variabel-variabel yang ada menjadi dimensi-dimensi baru yang disebut principal component (PC).

head(airplane)
#>   Gender     Customer.Type Age  Type.of.Travel    Class Flight.Distance
#> 1   Male    Loyal Customer  13 Personal Travel Eco Plus             460
#> 2   Male disloyal Customer  25 Business travel Business             235
#> 3 Female    Loyal Customer  26 Business travel Business            1142
#> 4 Female    Loyal Customer  25 Business travel Business             562
#> 5   Male    Loyal Customer  61 Business travel Business             214
#> 6 Female    Loyal Customer  26 Personal Travel      Eco            1180
#>   Inflight.wifi.service Departure.Arrival.time.convenient
#> 1                     3                                 4
#> 2                     3                                 2
#> 3                     2                                 2
#> 4                     2                                 5
#> 5                     3                                 3
#> 6                     3                                 4
#>   Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1                      3             1              5               3
#> 2                      3             3              1               3
#> 3                      2             2              5               5
#> 4                      5             5              2               2
#> 5                      3             3              4               5
#> 6                      2             1              1               2
#>   Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1            5                      5                4                3
#> 2            1                      1                1                5
#> 3            5                      5                4                3
#> 4            2                      2                2                5
#> 5            5                      3                3                4
#> 6            1                      1                3                4
#>   Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1                4               4                5           5
#> 2                3               1                4           1
#> 3                4               4                4           5
#> 4                3               1                4           2
#> 5                4               3                3           3
#> 6                4               4                4           1
#>   Departure.Delay.in.Minutes Arrival.Delay.in.Minutes            satisfaction
#> 1                         25                       18 neutral or dissatisfied
#> 2                          1                        6 neutral or dissatisfied
#> 3                          0                        0               satisfied
#> 4                         11                        9 neutral or dissatisfied
#> 5                          0                        0               satisfied
#> 6                          0                        0 neutral or dissatisfied
df <- airplane %>% 
  select(-c(1,2,4,5,23))
head(df,3)
#>   Age Flight.Distance Inflight.wifi.service Departure.Arrival.time.convenient
#> 1  13             460                     3                                 4
#> 2  25             235                     3                                 2
#> 3  26            1142                     2                                 2
#>   Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1                      3             1              5               3
#> 2                      3             3              1               3
#> 3                      2             2              5               5
#>   Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1            5                      5                4                3
#> 2            1                      1                1                5
#> 3            5                      5                4                3
#>   Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1                4               4                5           5
#> 2                3               1                4           1
#> 3                4               4                4           5
#>   Departure.Delay.in.Minutes Arrival.Delay.in.Minutes
#> 1                         25                       18
#> 2                          1                        6
#> 3                          0                        0
glimpse(df)
#> Rows: 103,594
#> Columns: 18
#> $ Age                               <int> 13, 25, 26, 25, 61, 26, 47, 52, 41,…
#> $ Flight.Distance                   <int> 460, 235, 1142, 562, 214, 1180, 127…
#> $ Inflight.wifi.service             <int> 3, 3, 2, 2, 3, 3, 2, 4, 1, 3, 4, 2,…
#> $ Departure.Arrival.time.convenient <int> 4, 2, 2, 5, 3, 4, 4, 3, 2, 3, 5, 4,…
#> $ Ease.of.Online.booking            <int> 3, 3, 2, 5, 3, 2, 2, 4, 2, 3, 5, 2,…
#> $ Gate.location                     <int> 1, 3, 2, 5, 3, 1, 3, 4, 2, 4, 4, 2,…
#> $ Food.and.drink                    <int> 5, 1, 5, 2, 4, 1, 2, 5, 4, 2, 2, 1,…
#> $ Online.boarding                   <int> 3, 3, 5, 2, 5, 2, 2, 5, 3, 3, 5, 2,…
#> $ Seat.comfort                      <int> 5, 1, 5, 2, 5, 1, 2, 5, 3, 3, 2, 1,…
#> $ Inflight.entertainment            <int> 5, 1, 5, 2, 3, 1, 2, 5, 1, 2, 2, 1,…
#> $ On.board.service                  <int> 4, 1, 4, 2, 3, 3, 3, 5, 1, 2, 3, 1,…
#> $ Leg.room.service                  <int> 3, 5, 3, 5, 4, 4, 3, 5, 2, 3, 3, 2,…
#> $ Baggage.handling                  <int> 4, 3, 4, 3, 4, 4, 4, 5, 1, 4, 5, 5,…
#> $ Checkin.service                   <int> 4, 1, 4, 1, 3, 4, 3, 4, 4, 4, 3, 5,…
#> $ Inflight.service                  <int> 5, 4, 4, 4, 3, 4, 5, 5, 1, 3, 5, 5,…
#> $ Cleanliness                       <int> 5, 1, 5, 2, 3, 1, 2, 4, 2, 2, 2, 1,…
#> $ Departure.Delay.in.Minutes        <int> 25, 1, 0, 11, 0, 0, 9, 4, 0, 0, 0, …
#> $ Arrival.Delay.in.Minutes          <dbl> 18, 6, 0, 9, 0, 0, 23, 0, 0, 0, 0, …
dim(df)
#> [1] 103594     18
  • output: PC dengan
df_pc <- df %>% scale() %>% prcomp()
head(df_pc$x, 3)
#>            PC1       PC2        PC3        PC4        PC5        PC6       PC7
#> [1,]  2.106698 -1.236612 -0.2322703 -0.3423958 1.91270733 -0.2153185 1.4493111
#> [2,] -3.322792  1.241931  1.2603804  0.5424145 0.08913361 -2.1738239 0.1840568
#> [3,]  2.128788 -1.926245 -0.9123639  0.4258381 0.39599178 -0.2915697 1.0553938
#>             PC8        PC9          PC10       PC11       PC12        PC13
#> [1,] -0.2875312 -0.4884377 -1.3390422677  0.1302104 -0.1410566  0.60010621
#> [2,]  0.5860853  1.2514371  0.2195892726  1.0153447  0.6478209  0.81200696
#> [3,] -0.3578300 -0.5148965  0.0008271163 -0.1700469  0.4501204 -0.01314923
#>            PC14       PC15       PC16        PC17         PC18
#> [1,] -0.3891295  0.4518179 -0.4413054  0.05565799 -0.136282705
#> [2,]  0.4108619 -0.2428415  0.2189309  0.17851657  0.081910451
#> [3,]  0.5884105 -0.2076590 -0.1160029 -0.19549608 -0.001516704
dim(df_pc$x)
#> [1] 103594     18
# 1 menghitung matriks variance-covariance yang memuat informasi pada data awal
var_cov <- df %>% 
  scale() %>% 
  var()

var_cov
#>                                            Age Flight.Distance
#> Age                                1.000000000     0.099837593
#> Flight.Distance                    0.099837593     1.000000000
#> Inflight.wifi.service              0.017469851     0.007050175
#> Departure.Arrival.time.convenient  0.038037603    -0.019908273
#> Ease.of.Online.booking             0.024461455     0.065696835
#> Gate.location                     -0.001557918     0.004732489
#> Food.and.drink                     0.022920172     0.056956659
#> Online.boarding                    0.208681236     0.215190564
#> Seat.comfort                       0.160301708     0.157516814
#> Inflight.entertainment             0.076379730     0.128644538
#> On.board.service                   0.057123128     0.109540031
#> Leg.room.service                   0.040497716     0.133839441
#> Baggage.handling                  -0.047619235     0.063222445
#> Checkin.service                    0.035003112     0.073223531
#> Inflight.service                  -0.049899448     0.057430450
#> Cleanliness                        0.053493252     0.093120600
#> Departure.Delay.in.Minutes        -0.010150333     0.001905754
#> Arrival.Delay.in.Minutes          -0.012146864    -0.002425811
#>                                   Inflight.wifi.service
#> Age                                         0.017469851
#> Flight.Distance                             0.007050175
#> Inflight.wifi.service                       1.000000000
#> Departure.Arrival.time.convenient           0.343758147
#> Ease.of.Online.booking                      0.715848109
#> Gate.location                               0.336126779
#> Food.and.drink                              0.134602736
#> Online.boarding                             0.457002102
#> Seat.comfort                                0.122616793
#> Inflight.entertainment                      0.209513168
#> On.board.service                            0.121483616
#> Leg.room.service                            0.160484621
#> Baggage.handling                            0.121059921
#> Checkin.service                             0.043178243
#> Inflight.service                            0.110626393
#> Cleanliness                                 0.132651868
#> Departure.Delay.in.Minutes                 -0.017450574
#> Arrival.Delay.in.Minutes                   -0.019095059
#>                                   Departure.Arrival.time.convenient
#> Age                                                    0.0380376033
#> Flight.Distance                                       -0.0199082730
#> Inflight.wifi.service                                  0.3437581468
#> Departure.Arrival.time.convenient                      1.0000000000
#> Ease.of.Online.booking                                 0.4370210571
#> Gate.location                                          0.4446005692
#> Food.and.drink                                         0.0051890995
#> Online.boarding                                        0.0699899368
#> Seat.comfort                                           0.0114156259
#> Inflight.entertainment                                -0.0046828402
#> On.board.service                                       0.0686043168
#> Leg.room.service                                       0.0124609961
#> Baggage.handling                                       0.0719005755
#> Checkin.service                                        0.0933289055
#> Inflight.service                                       0.0732271988
#> Cleanliness                                            0.0143368018
#> Departure.Delay.in.Minutes                             0.0007906629
#> Arrival.Delay.in.Minutes                              -0.0008641871
#>                                   Ease.of.Online.booking Gate.location
#> Age                                          0.024461455  -0.001557918
#> Flight.Distance                              0.065696835   0.004732489
#> Inflight.wifi.service                        0.715848109   0.336126779
#> Departure.Arrival.time.convenient            0.437021057   0.444600569
#> Ease.of.Online.booking                       1.000000000   0.458745522
#> Gate.location                                0.458745522   1.000000000
#> Food.and.drink                               0.031940337  -0.001170361
#> Online.boarding                              0.404093378   0.001451071
#> Seat.comfort                                 0.030021236   0.003383340
#> Inflight.entertainment                       0.047184630   0.003564031
#> On.board.service                             0.038758791  -0.028532444
#> Leg.room.service                             0.107431439  -0.005867883
#> Baggage.handling                             0.038851463   0.002420656
#> Checkin.service                              0.010957101  -0.035450773
#> Inflight.service                             0.035330326   0.001741645
#> Cleanliness                                  0.016191612  -0.004015149
#> Departure.Delay.in.Minutes                  -0.006291903   0.005532748
#> Arrival.Delay.in.Minutes                    -0.007983623   0.005142879
#>                                   Food.and.drink Online.boarding Seat.comfort
#> Age                                  0.022920172     0.208681236   0.16030171
#> Flight.Distance                      0.056956659     0.215190564   0.15751681
#> Inflight.wifi.service                0.134602736     0.457002102   0.12261679
#> Departure.Arrival.time.convenient    0.005189100     0.069989937   0.01141563
#> Ease.of.Online.booking               0.031940337     0.404093378   0.03002124
#> Gate.location                       -0.001170361     0.001451071   0.00338334
#> Food.and.drink                       1.000000000     0.234491983   0.57456090
#> Online.boarding                      0.234491983     1.000000000   0.42006703
#> Seat.comfort                         0.574560901     0.420067035   1.00000000
#> Inflight.entertainment               0.622373825     0.285194465   0.61061425
#> On.board.service                     0.058999288     0.155344827   0.13203032
#> Leg.room.service                     0.032414585     0.123780411   0.10544743
#> Baggage.handling                     0.034811481     0.083298686   0.07455283
#> Checkin.service                      0.087055295     0.204208411   0.19154468
#> Inflight.service                     0.034077210     0.074390153   0.06919311
#> Cleanliness                          0.657647654     0.331498130   0.67847850
#> Departure.Delay.in.Minutes          -0.029982818    -0.018515107  -0.02732269
#> Arrival.Delay.in.Minutes            -0.032524227    -0.021948617  -0.02990012
#>                                   Inflight.entertainment On.board.service
#> Age                                          0.076379730       0.05712313
#> Flight.Distance                              0.128644538       0.10954003
#> Inflight.wifi.service                        0.209513168       0.12148362
#> Departure.Arrival.time.convenient           -0.004682840       0.06860432
#> Ease.of.Online.booking                       0.047184630       0.03875879
#> Gate.location                                0.003564031      -0.02853244
#> Food.and.drink                               0.622373825       0.05899929
#> Online.boarding                              0.285194465       0.15534483
#> Seat.comfort                                 0.610614249       0.13203032
#> Inflight.entertainment                       1.000000000       0.42035247
#> On.board.service                             0.420352471       1.00000000
#> Leg.room.service                             0.299850464       0.35565702
#> Baggage.handling                             0.378360540       0.51925213
#> Checkin.service                              0.120812126       0.24385229
#> Inflight.service                             0.405247126       0.55072523
#> Cleanliness                                  0.691735153       0.12323636
#> Departure.Delay.in.Minutes                  -0.027690912      -0.03147429
#> Arrival.Delay.in.Minutes                    -0.030702812      -0.03522681
#>                                   Leg.room.service Baggage.handling
#> Age                                    0.040497716     -0.047619235
#> Flight.Distance                        0.133839441      0.063222445
#> Inflight.wifi.service                  0.160484621      0.121059921
#> Departure.Arrival.time.convenient      0.012460996      0.071900575
#> Ease.of.Online.booking                 0.107431439      0.038851463
#> Gate.location                         -0.005867883      0.002420656
#> Food.and.drink                         0.032414585      0.034811481
#> Online.boarding                        0.123780411      0.083298686
#> Seat.comfort                           0.105447427      0.074552833
#> Inflight.entertainment                 0.299850464      0.378360540
#> On.board.service                       0.355657016      0.519252126
#> Leg.room.service                       1.000000000      0.369673536
#> Baggage.handling                       0.369673536      1.000000000
#> Checkin.service                        0.153079244      0.233326115
#> Inflight.service                       0.368925305      0.628944428
#> Cleanliness                            0.096400653      0.095782865
#> Departure.Delay.in.Minutes             0.014336338     -0.005683115
#> Arrival.Delay.in.Minutes               0.011843226     -0.008541674
#>                                   Checkin.service Inflight.service  Cleanliness
#> Age                                    0.03500311     -0.049899448  0.053493252
#> Flight.Distance                        0.07322353      0.057430450  0.093120600
#> Inflight.wifi.service                  0.04317824      0.110626393  0.132651868
#> Departure.Arrival.time.convenient      0.09332891      0.073227199  0.014336802
#> Ease.of.Online.booking                 0.01095710      0.035330326  0.016191612
#> Gate.location                         -0.03545077      0.001741645 -0.004015149
#> Food.and.drink                         0.08705530      0.034077210  0.657647654
#> Online.boarding                        0.20420841      0.074390153  0.331498130
#> Seat.comfort                           0.19154468      0.069193110  0.678478499
#> Inflight.entertainment                 0.12081213      0.405247126  0.691735153
#> On.board.service                       0.24385229      0.550725227  0.123236364
#> Leg.room.service                       0.15307924      0.368925305  0.096400653
#> Baggage.handling                       0.23332611      0.628944428  0.095782865
#> Checkin.service                        1.00000000      0.237255743  0.179431236
#> Inflight.service                       0.23725574      1.000000000  0.088891472
#> Cleanliness                            0.17943124      0.088891472  1.000000000
#> Departure.Delay.in.Minutes            -0.01806527     -0.054451631 -0.013834890
#> Arrival.Delay.in.Minutes              -0.02036937     -0.059195845 -0.015773723
#>                                   Departure.Delay.in.Minutes
#> Age                                            -0.0101503333
#> Flight.Distance                                 0.0019057541
#> Inflight.wifi.service                          -0.0174505738
#> Departure.Arrival.time.convenient               0.0007906629
#> Ease.of.Online.booking                         -0.0062919026
#> Gate.location                                   0.0055327479
#> Food.and.drink                                 -0.0299828180
#> Online.boarding                                -0.0185151073
#> Seat.comfort                                   -0.0273226860
#> Inflight.entertainment                         -0.0276909124
#> On.board.service                               -0.0314742929
#> Leg.room.service                                0.0143363377
#> Baggage.handling                               -0.0056831152
#> Checkin.service                                -0.0180652723
#> Inflight.service                               -0.0544516308
#> Cleanliness                                    -0.0138348898
#> Departure.Delay.in.Minutes                      1.0000000000
#> Arrival.Delay.in.Minutes                        0.9654809014
#>                                   Arrival.Delay.in.Minutes
#> Age                                          -0.0121468636
#> Flight.Distance                              -0.0024258113
#> Inflight.wifi.service                        -0.0190950592
#> Departure.Arrival.time.convenient            -0.0008641871
#> Ease.of.Online.booking                       -0.0079836230
#> Gate.location                                 0.0051428789
#> Food.and.drink                               -0.0325242269
#> Online.boarding                              -0.0219486175
#> Seat.comfort                                 -0.0299001178
#> Inflight.entertainment                       -0.0307028116
#> On.board.service                             -0.0352268123
#> Leg.room.service                              0.0118432259
#> Baggage.handling                             -0.0085416736
#> Checkin.service                              -0.0203693714
#> Inflight.service                             -0.0591958450
#> Cleanliness                                  -0.0157737235
#> Departure.Delay.in.Minutes                    0.9654809014
#> Arrival.Delay.in.Minutes                      1.0000000000
# 2 memecah/mendekomposisi matriks variance covariance menjadi 
#   - eigen value (mewakili informasi yang dimuat oleh setiap PC)
#   - eigen vector (besar perubahan arah garis linier)
eig <- eigen(var_cov)
eig
#> eigen() decomposition
#> $values
#>  [1] 3.86067850 2.36324783 2.17933427 1.96253451 1.24514248 0.96288004
#>  [7] 0.92834159 0.89139099 0.69149756 0.53392241 0.47895699 0.44686863
#> [13] 0.36696178 0.32321721 0.29467345 0.25070741 0.18515330 0.03449105
#> 
#> $vectors
#>              [,1]        [,2]        [,3]         [,4]         [,5]
#>  [1,] -0.07120467  0.01012126  0.10267024  0.009334937 -0.525477561
#>  [2,] -0.11957453 -0.01979722  0.01113523 -0.021358489 -0.487347576
#>  [3,] -0.23103580  0.44967656  0.04341691  0.013757640 -0.014003609
#>  [4,] -0.09918330  0.41742737 -0.02621735  0.008812775  0.193658527
#>  [5,] -0.16263344  0.53649871  0.04597711  0.016076055 -0.050209995
#>  [6,] -0.06405416  0.43027733  0.03357226  0.010141601  0.262293874
#>  [7,] -0.30215953 -0.15740790  0.33887851  0.003791875  0.254358523
#>  [8,] -0.28698476  0.15158750  0.16991676  0.002118812 -0.425962598
#>  [9,] -0.34563173 -0.15305620  0.31386895 -0.006024007 -0.012032057
#> [10,] -0.42222447 -0.17727834  0.06461320 -0.030309268  0.183369287
#> [11,] -0.27455501 -0.07053181 -0.37879139 -0.028696730 -0.042136364
#> [12,] -0.22043041 -0.01409189 -0.29502943 -0.066218375 -0.126762948
#> [13,] -0.25483400 -0.05720561 -0.43517739 -0.053156830  0.099220117
#> [14,] -0.18117271 -0.04862467 -0.13163708 -0.012954580 -0.169075688
#> [15,] -0.25874930 -0.06495181 -0.44670724 -0.010270936  0.110159477
#> [16,] -0.35165900 -0.17555336  0.31212097 -0.019326223  0.170266942
#> [17,]  0.03815949  0.02858195  0.03766455 -0.703433692  0.002757910
#> [18,]  0.04078750  0.02840667  0.03878262 -0.703103717  0.005126555
#>                [,6]         [,7]        [,8]         [,9]        [,10]
#>  [1,]  0.3740550852 -0.423176292  0.55790719 -0.001343398  0.088751158
#>  [2,] -0.1323159182 -0.358733669 -0.73221346 -0.144626050 -0.062533189
#>  [3,] -0.2808146857  0.206808241  0.14905443 -0.029679852  0.013772163
#>  [4,]  0.4385748056 -0.159282423 -0.08967282 -0.019287178 -0.701883140
#>  [5,] -0.1864947573  0.099179398  0.01373025 -0.010366237  0.034710802
#>  [6,]  0.2178734430 -0.361072072 -0.17236842  0.101241481  0.629258604
#>  [7,] -0.0194266751 -0.037931382 -0.02604581  0.022168648 -0.082077631
#>  [8,] -0.1591530419  0.337370889  0.08866743 -0.182309074  0.007879992
#>  [9,]  0.1062458161 -0.022935449 -0.02396641  0.041523858  0.045141729
#> [10,] -0.1045695031 -0.171778033  0.07369884 -0.049803882  0.022954043
#> [11,]  0.0454428573 -0.071478047  0.09998654 -0.256758258 -0.122274770
#> [12,] -0.2458334888 -0.129607636  0.03930357  0.851546367 -0.129469682
#> [13,] -0.0071149979 -0.016855469  0.02680305 -0.198245592  0.120006288
#> [14,]  0.6127957617  0.561426546 -0.24971525  0.222833230  0.183332998
#> [15,]  0.0007333593 -0.030947241  0.03457674 -0.212436912  0.093460244
#> [16,]  0.0540713789  0.007616781 -0.04294262  0.055584253 -0.019127205
#> [17,]  0.0102996065  0.008827023  0.01003874 -0.026388623  0.002723661
#> [18,]  0.0096234482  0.010267770  0.01184920 -0.024722868  0.003793709
#>              [,11]        [,12]        [,13]        [,14]        [,15]
#>  [1,] -0.132087739 -0.214886848 -0.006797917 -0.050105404  0.020842557
#>  [2,] -0.054722927 -0.160004642 -0.001628660 -0.083847205 -0.004566956
#>  [3,] -0.002692876 -0.286489090  0.033669271 -0.379875565 -0.121698219
#>  [4,] -0.117046673  0.156561236 -0.014931834  0.056262318  0.044442241
#>  [5,] -0.005033891 -0.176119175 -0.019018502 -0.064952907 -0.112546123
#>  [6,]  0.154273064  0.175425058  0.014055350  0.195143456  0.107290862
#>  [7,] -0.044502859 -0.515638984  0.043045264  0.594375842 -0.188506063
#>  [8,] -0.030042440  0.410574608 -0.037896199  0.479635901  0.270343377
#>  [9,]  0.007723806  0.486880565 -0.005564194 -0.207950755 -0.667013688
#> [10,]  0.039695092 -0.103797482 -0.054177738 -0.183680730  0.105369346
#> [11,]  0.784927569 -0.015432511  0.162483405  0.047600427 -0.021537656
#> [12,]  0.013761805  0.086371214 -0.002828309  0.112086840  0.024794779
#> [13,] -0.502741712  0.069886534  0.639411739  0.035829513 -0.022302884
#> [14,]  0.034836877 -0.244083649 -0.002839250 -0.071023598 -0.019304714
#> [15,] -0.258299478  0.006014077 -0.745331855  0.048045294 -0.042695955
#> [16,] -0.022392798  0.072500451  0.011518578 -0.341885090  0.624521445
#> [17,]  0.002990317 -0.008047350 -0.022648638  0.004548643 -0.009332673
#> [18,]  0.003822737 -0.006693515 -0.018724172  0.001079697 -0.008083642
#>              [,16]        [,17]          [,18]
#>  [1,]  0.021207248 -0.053595520  0.00107385980
#>  [2,] -0.058247072 -0.031811140  0.00256072411
#>  [3,] -0.570961846 -0.169995441 -0.00026586369
#>  [4,] -0.098460068  0.074877358  0.00081010975
#>  [5,]  0.763683036  0.056622327  0.00027672243
#>  [6,] -0.150113097 -0.007971532 -0.00010578489
#>  [7,] -0.034351868 -0.186890193  0.00249054209
#>  [8,] -0.127503475  0.117160140  0.00113223231
#>  [9,]  0.047431154 -0.121339126  0.00038714179
#> [10,] -0.047292538  0.800207910 -0.00207997688
#> [11,]  0.051185874 -0.165573712  0.00094644104
#> [12,] -0.016290419 -0.044091997  0.00021992106
#> [13,]  0.045145242 -0.068420366 -0.00046253290
#> [14,] -0.005860419  0.127034401  0.00006791429
#> [15,]  0.013993199 -0.185496219  0.00370216162
#> [16,]  0.166251083 -0.412322135 -0.00017979624
#> [17,] -0.001240937 -0.005618109 -0.70694104030
#> [18,] -0.001389102 -0.001657268  0.70724746617
# 3 melakukan transformasi terhadap data awal: data awal (103594x18) * egin vector (18x18)
df_pc_manual <- scale(df) %*% eig$vectors
head(df_pc_manual, 3)
#>           [,1]      [,2]       [,3]       [,4]       [,5]       [,6]      [,7]
#> [1,] -2.106698 -1.236612  0.2322703 -0.3423958 1.91270733 -0.2153185 1.4493111
#> [2,]  3.322792  1.241931 -1.2603804  0.5424145 0.08913361 -2.1738239 0.1840568
#> [3,] -2.128788 -1.926245  0.9123639  0.4258381 0.39599178 -0.2915697 1.0553938
#>            [,8]       [,9]         [,10]      [,11]      [,12]       [,13]
#> [1,] -0.2875312 -0.4884377 -1.3390422677 -0.1302104 -0.1410566 -0.60010621
#> [2,]  0.5860853  1.2514371  0.2195892726 -1.0153447  0.6478209 -0.81200696
#> [3,] -0.3578300 -0.5148965  0.0008271163  0.1700469  0.4501204  0.01314923
#>           [,14]      [,15]      [,16]       [,17]        [,18]
#> [1,] -0.3891295 -0.4518179  0.4413054 -0.05565799 -0.136282705
#> [2,]  0.4108619  0.2428415 -0.2189309 -0.17851657  0.081910451
#> [3,]  0.5884105  0.2076590  0.1160029  0.19549608 -0.001516704
# head(df_pc$x, 3)
dim(df_pc_manual)
#> [1] 103594     18
  • limitation: bagus untuk data yang variabel numerik nya saling berkolrelasi
ggcorr(df, label = T)

  • membuat garis linier baru (PC) sejumlah variabel awal berdasarkan hasil pemecahan matriks variance-covariance
    • PC yang dihasilkan akan tegak lurus terhadap PC lainnya, sehingga antar PC tidak saling berkorelasi
ggcorr(df_pc$x, label = T)

- Karena antar PC tidak saling berkorelasi, maka setiap PC merangkum informasi yang berbeda dengan PC lainnya (jumlah informasi yang dirangkum oleh PC1 > PC2 > PC3 > PC4 dst) - Tahapan reduksi dimensi: user menentukan akan menggunakan berapa PC berdasarkan kumulatif besar informasi yang dirangkum oleh setiap PC

summary(df_pc)
#> Importance of components:
#>                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
#> Standard deviation     1.9649 1.5373 1.4763 1.4009 1.11586 0.98126 0.96350
#> Proportion of Variance 0.2145 0.1313 0.1211 0.1090 0.06917 0.05349 0.05157
#> Cumulative Proportion  0.2145 0.3458 0.4668 0.5759 0.64505 0.69855 0.75012
#>                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
#> Standard deviation     0.94414 0.83156 0.73070 0.69207 0.66848 0.60577 0.56852
#> Proportion of Variance 0.04952 0.03842 0.02966 0.02661 0.02483 0.02039 0.01796
#> Cumulative Proportion  0.79964 0.83806 0.86772 0.89433 0.91916 0.93954 0.95750
#>                           PC15    PC16    PC17    PC18
#> Standard deviation     0.54284 0.50071 0.43029 0.18572
#> Proportion of Variance 0.01637 0.01393 0.01029 0.00192
#> Cumulative Proportion  0.97387 0.98780 0.99808 1.00000

Setelah melakukan PCA (linier) pada data df, maka user memutuskan akan mereduksi jumlah kolom yang awalnya 18 kolom dengan hanya menggunakan 11 PC saja (PC1 sd PC11), karena cukup dengan menggunakan 11 PC sudah merangkum sebesar 89.43% informasi pada data awal

Keterangan:
- standard deviation: standar deviasi dari setiap PC. Jika standar deviasi diuadratkan, maka akan diperoleh variansi (eigen value)
- Proportion of Variance: persentase/proporsi informasi yang dirangkum oleh setiap PC
- Cumulative Proportion: persentase/proporsi informasi kumulatif

plot(df_pc)

Terdapat beberapa nilai yang dihasilkan oleh PCA yaitu :

  • $sdev: standar deviasi dari setiap PC. Jika standar deviasi diuadratkan, maka akan diperoleh variansi (eigen value)
df_pc$sdev
#>  [1] 1.9648609 1.5372859 1.4762568 1.4009049 1.1158595 0.9812645 0.9635048
#>  [8] 0.9441350 0.8315633 0.7306999 0.6920672 0.6684823 0.6057737 0.5685219
#> [15] 0.5428383 0.5007069 0.4302944 0.1857177
  • $rotation: matrix transformasi berisi eigen vector untuk setiap PC
df_pc$rotation
#>                                           PC1         PC2         PC3
#> Age                                0.07120467  0.01012126 -0.10267024
#> Flight.Distance                    0.11957453 -0.01979722 -0.01113523
#> Inflight.wifi.service              0.23103580  0.44967656 -0.04341691
#> Departure.Arrival.time.convenient  0.09918330  0.41742737  0.02621735
#> Ease.of.Online.booking             0.16263344  0.53649871 -0.04597711
#> Gate.location                      0.06405416  0.43027733 -0.03357226
#> Food.and.drink                     0.30215953 -0.15740790 -0.33887851
#> Online.boarding                    0.28698476  0.15158750 -0.16991676
#> Seat.comfort                       0.34563173 -0.15305620 -0.31386895
#> Inflight.entertainment             0.42222447 -0.17727834 -0.06461320
#> On.board.service                   0.27455501 -0.07053181  0.37879139
#> Leg.room.service                   0.22043041 -0.01409189  0.29502943
#> Baggage.handling                   0.25483400 -0.05720561  0.43517739
#> Checkin.service                    0.18117271 -0.04862467  0.13163708
#> Inflight.service                   0.25874930 -0.06495181  0.44670724
#> Cleanliness                        0.35165900 -0.17555336 -0.31212097
#> Departure.Delay.in.Minutes        -0.03815949  0.02858195 -0.03766455
#> Arrival.Delay.in.Minutes          -0.04078750  0.02840667 -0.03878262
#>                                            PC4          PC5           PC6
#> Age                                0.009334937 -0.525477561  0.3740550852
#> Flight.Distance                   -0.021358489 -0.487347576 -0.1323159182
#> Inflight.wifi.service              0.013757640 -0.014003609 -0.2808146857
#> Departure.Arrival.time.convenient  0.008812775  0.193658527  0.4385748056
#> Ease.of.Online.booking             0.016076055 -0.050209995 -0.1864947573
#> Gate.location                      0.010141601  0.262293874  0.2178734430
#> Food.and.drink                     0.003791875  0.254358523 -0.0194266751
#> Online.boarding                    0.002118812 -0.425962598 -0.1591530419
#> Seat.comfort                      -0.006024007 -0.012032057  0.1062458161
#> Inflight.entertainment            -0.030309268  0.183369287 -0.1045695031
#> On.board.service                  -0.028696730 -0.042136364  0.0454428573
#> Leg.room.service                  -0.066218375 -0.126762948 -0.2458334888
#> Baggage.handling                  -0.053156830  0.099220117 -0.0071149979
#> Checkin.service                   -0.012954580 -0.169075688  0.6127957617
#> Inflight.service                  -0.010270936  0.110159477  0.0007333593
#> Cleanliness                       -0.019326223  0.170266942  0.0540713789
#> Departure.Delay.in.Minutes        -0.703433692  0.002757910  0.0102996065
#> Arrival.Delay.in.Minutes          -0.703103717  0.005126555  0.0096234482
#>                                            PC7         PC8          PC9
#> Age                               -0.423176292  0.55790719 -0.001343398
#> Flight.Distance                   -0.358733669 -0.73221346 -0.144626050
#> Inflight.wifi.service              0.206808241  0.14905443 -0.029679852
#> Departure.Arrival.time.convenient -0.159282423 -0.08967282 -0.019287178
#> Ease.of.Online.booking             0.099179398  0.01373025 -0.010366237
#> Gate.location                     -0.361072072 -0.17236842  0.101241481
#> Food.and.drink                    -0.037931382 -0.02604581  0.022168648
#> Online.boarding                    0.337370889  0.08866743 -0.182309074
#> Seat.comfort                      -0.022935449 -0.02396641  0.041523858
#> Inflight.entertainment            -0.171778033  0.07369884 -0.049803882
#> On.board.service                  -0.071478047  0.09998654 -0.256758258
#> Leg.room.service                  -0.129607636  0.03930357  0.851546367
#> Baggage.handling                  -0.016855469  0.02680305 -0.198245592
#> Checkin.service                    0.561426546 -0.24971525  0.222833230
#> Inflight.service                  -0.030947241  0.03457674 -0.212436912
#> Cleanliness                        0.007616781 -0.04294262  0.055584253
#> Departure.Delay.in.Minutes         0.008827023  0.01003874 -0.026388623
#> Arrival.Delay.in.Minutes           0.010267770  0.01184920 -0.024722868
#>                                           PC10         PC11         PC12
#> Age                                0.088751158  0.132087739 -0.214886848
#> Flight.Distance                   -0.062533189  0.054722927 -0.160004642
#> Inflight.wifi.service              0.013772163  0.002692876 -0.286489090
#> Departure.Arrival.time.convenient -0.701883140  0.117046673  0.156561236
#> Ease.of.Online.booking             0.034710802  0.005033891 -0.176119175
#> Gate.location                      0.629258604 -0.154273064  0.175425058
#> Food.and.drink                    -0.082077631  0.044502859 -0.515638984
#> Online.boarding                    0.007879992  0.030042440  0.410574608
#> Seat.comfort                       0.045141729 -0.007723806  0.486880565
#> Inflight.entertainment             0.022954043 -0.039695092 -0.103797482
#> On.board.service                  -0.122274770 -0.784927569 -0.015432511
#> Leg.room.service                  -0.129469682 -0.013761805  0.086371214
#> Baggage.handling                   0.120006288  0.502741712  0.069886534
#> Checkin.service                    0.183332998 -0.034836877 -0.244083649
#> Inflight.service                   0.093460244  0.258299478  0.006014077
#> Cleanliness                       -0.019127205  0.022392798  0.072500451
#> Departure.Delay.in.Minutes         0.002723661 -0.002990317 -0.008047350
#> Arrival.Delay.in.Minutes           0.003793709 -0.003822737 -0.006693515
#>                                           PC13         PC14         PC15
#> Age                                0.006797917 -0.050105404 -0.020842557
#> Flight.Distance                    0.001628660 -0.083847205  0.004566956
#> Inflight.wifi.service             -0.033669271 -0.379875565  0.121698219
#> Departure.Arrival.time.convenient  0.014931834  0.056262318 -0.044442241
#> Ease.of.Online.booking             0.019018502 -0.064952907  0.112546123
#> Gate.location                     -0.014055350  0.195143456 -0.107290862
#> Food.and.drink                    -0.043045264  0.594375842  0.188506063
#> Online.boarding                    0.037896199  0.479635901 -0.270343377
#> Seat.comfort                       0.005564194 -0.207950755  0.667013688
#> Inflight.entertainment             0.054177738 -0.183680730 -0.105369346
#> On.board.service                  -0.162483405  0.047600427  0.021537656
#> Leg.room.service                   0.002828309  0.112086840 -0.024794779
#> Baggage.handling                  -0.639411739  0.035829513  0.022302884
#> Checkin.service                    0.002839250 -0.071023598  0.019304714
#> Inflight.service                   0.745331855  0.048045294  0.042695955
#> Cleanliness                       -0.011518578 -0.341885090 -0.624521445
#> Departure.Delay.in.Minutes         0.022648638  0.004548643  0.009332673
#> Arrival.Delay.in.Minutes           0.018724172  0.001079697  0.008083642
#>                                           PC16         PC17           PC18
#> Age                               -0.021207248  0.053595520  0.00107385980
#> Flight.Distance                    0.058247072  0.031811140  0.00256072411
#> Inflight.wifi.service              0.570961846  0.169995441 -0.00026586369
#> Departure.Arrival.time.convenient  0.098460068 -0.074877358  0.00081010975
#> Ease.of.Online.booking            -0.763683036 -0.056622327  0.00027672243
#> Gate.location                      0.150113097  0.007971532 -0.00010578489
#> Food.and.drink                     0.034351868  0.186890193  0.00249054209
#> Online.boarding                    0.127503475 -0.117160140  0.00113223231
#> Seat.comfort                      -0.047431154  0.121339126  0.00038714179
#> Inflight.entertainment             0.047292538 -0.800207910 -0.00207997688
#> On.board.service                  -0.051185874  0.165573712  0.00094644104
#> Leg.room.service                   0.016290419  0.044091997  0.00021992106
#> Baggage.handling                  -0.045145242  0.068420366 -0.00046253290
#> Checkin.service                    0.005860419 -0.127034401  0.00006791429
#> Inflight.service                  -0.013993199  0.185496219  0.00370216162
#> Cleanliness                       -0.166251083  0.412322135 -0.00017979624
#> Departure.Delay.in.Minutes         0.001240937  0.005618109 -0.70694104030
#> Arrival.Delay.in.Minutes           0.001389102  0.001657268  0.70724746617
  • pca$x: hasil transformasi data berupa nilai pada setiap PC
head(df_pc$x)
#>             PC1        PC2        PC3        PC4         PC5         PC6
#> [1,]  2.1066977 -1.2366121 -0.2322703 -0.3423958  1.91270733 -0.21531852
#> [2,] -3.3227916  1.2419314  1.2603804  0.5424145  0.08913361 -2.17382386
#> [3,]  2.1287884 -1.9262455 -0.9123639  0.4258381  0.39599178 -0.29156972
#> [4,] -1.8683588  2.4997439  0.8505465  0.2791634  1.39317601 -0.88331381
#> [5,]  0.7822565  0.2378688 -0.9405956  0.5658857 -0.83136070  0.19311936
#> [6,] -2.5926326  0.3342038  2.5400675  0.5629496 -0.49924921 -0.08667536
#>              PC7        PC8        PC9          PC10       PC11       PC12
#> [1,]  1.44931115 -0.2875312 -0.4884377 -1.3390422677  0.1302104 -0.1410566
#> [2,]  0.18405680  0.5860853  1.2514371  0.2195892726  1.0153447  0.6478209
#> [3,]  1.05539377 -0.3578300 -0.5148965  0.0008271163 -0.1700469  0.4501204
#> [4,] -1.29735584 -0.1925489  1.3121993 -0.2818494168  0.4094772  0.7816546
#> [5,]  0.06304966  1.6385823  0.4779975  0.1605988085  0.4451393  0.6981710
#> [6,]  1.15404456 -0.4402043  0.3920498 -1.3255796236  0.5806908 -0.3735397
#>             PC13       PC14       PC15        PC16         PC17         PC18
#> [1,]  0.60010621 -0.3891295  0.4518179 -0.44130536  0.055657989 -0.136282705
#> [2,]  0.81200696  0.4108619 -0.2428415  0.21893094  0.178516568  0.081910451
#> [3,] -0.01314923  0.5884105 -0.2076590 -0.11600291 -0.195496079 -0.001516704
#> [4,]  0.72934624  0.5665909 -0.1147616 -1.09025791  0.008636018 -0.045501138
#> [5,] -0.55121182  0.8205929  0.6798394  0.06279077  0.222871217 -0.003215653
#> [6,]  0.02134649 -0.3622343  0.1050978  0.50184920  0.208720428 -0.005437314
  • Other ouput
df_pc$center # mean dari setiap variabel pada data awal
#>                               Age                   Flight.Distance 
#>       -0.000000000000000012743856        0.000000000000000010592272 
#>             Inflight.wifi.service Departure.Arrival.time.convenient 
#>       -0.000000000000000031249873       -0.000000000000000095024546 
#>            Ease.of.Online.booking                     Gate.location 
#>       -0.000000000000000102860190        0.000000000000000182280263 
#>                    Food.and.drink                   Online.boarding 
#>       -0.000000000000000156509252        0.000000000000000116666978 
#>                      Seat.comfort            Inflight.entertainment 
#>        0.000000000000000042212353       -0.000000000000000103036486 
#>                  On.board.service                  Leg.room.service 
#>       -0.000000000000000086993048       -0.000000000000000165991171 
#>                  Baggage.handling                   Checkin.service 
#>        0.000000000000000020190939        0.000000000000000052605756 
#>                  Inflight.service                       Cleanliness 
#>        0.000000000000000135449695        0.000000000000000001535219 
#>        Departure.Delay.in.Minutes          Arrival.Delay.in.Minutes 
#>        0.000000000000000028245630        0.000000000000000033673075
df_pc$scale # sd dari setiap variabel pada data awal
#>                               Age                   Flight.Distance 
#>                         15.113125                        997.297235 
#>             Inflight.wifi.service Departure.Arrival.time.convenient 
#>                          1.327866                          1.525233 
#>            Ease.of.Online.booking                     Gate.location 
#>                          1.398934                          1.277723 
#>                    Food.and.drink                   Online.boarding 
#>                          1.329401                          1.349433 
#>                      Seat.comfort            Inflight.entertainment 
#>                          1.318896                          1.333030 
#>                  On.board.service                  Leg.room.service 
#>                          1.288284                          1.315409 
#>                  Baggage.handling                   Checkin.service 
#>                          1.181051                          1.265396 
#>                  Inflight.service                       Cleanliness 
#>                          1.175603                          1.312194 
#>        Departure.Delay.in.Minutes          Arrival.Delay.in.Minutes 
#>                         38.116737                         38.698682

Menggambil hasil PCA dan menggabungkan kembali dengan variabel kategorik/factor pada data awal
Ambil hasil transformasi data (PCA)

pc_keep <- df_pc$x[, 1:11] %>% 
  as.data.frame()

head(pc_keep)
#>          PC1        PC2        PC3        PC4         PC5         PC6
#> 1  2.1066977 -1.2366121 -0.2322703 -0.3423958  1.91270733 -0.21531852
#> 2 -3.3227916  1.2419314  1.2603804  0.5424145  0.08913361 -2.17382386
#> 3  2.1287884 -1.9262455 -0.9123639  0.4258381  0.39599178 -0.29156972
#> 4 -1.8683588  2.4997439  0.8505465  0.2791634  1.39317601 -0.88331381
#> 5  0.7822565  0.2378688 -0.9405956  0.5658857 -0.83136070  0.19311936
#> 6 -2.5926326  0.3342038  2.5400675  0.5629496 -0.49924921 -0.08667536
#>           PC7        PC8        PC9          PC10       PC11
#> 1  1.44931115 -0.2875312 -0.4884377 -1.3390422677  0.1302104
#> 2  0.18405680  0.5860853  1.2514371  0.2195892726  1.0153447
#> 3  1.05539377 -0.3578300 -0.5148965  0.0008271163 -0.1700469
#> 4 -1.29735584 -0.1925489  1.3121993 -0.2818494168  0.4094772
#> 5  0.06304966  1.6385823  0.4779975  0.1605988085  0.4451393
#> 6  1.15404456 -0.4402043  0.3920498 -1.3255796236  0.5806908

Gabungkan dengan data semula

mycols <- colnames(df)
colsnumber <- match(mycols, names(airplane))
airplane_pca <- airplane %>% 
  select(-colsnumber) %>% # membuang 7 kolom yang digunakan saat PCA  
  bind_cols(pc_keep) # mengabungkan hasil PCA dengan data awal

head(airplane_pca)
#>   Gender     Customer.Type  Type.of.Travel    Class            satisfaction
#> 1   Male    Loyal Customer Personal Travel Eco Plus neutral or dissatisfied
#> 2   Male disloyal Customer Business travel Business neutral or dissatisfied
#> 3 Female    Loyal Customer Business travel Business               satisfied
#> 4 Female    Loyal Customer Business travel Business neutral or dissatisfied
#> 5   Male    Loyal Customer Business travel Business               satisfied
#> 6 Female    Loyal Customer Personal Travel      Eco neutral or dissatisfied
#>          PC1        PC2        PC3        PC4         PC5         PC6
#> 1  2.1066977 -1.2366121 -0.2322703 -0.3423958  1.91270733 -0.21531852
#> 2 -3.3227916  1.2419314  1.2603804  0.5424145  0.08913361 -2.17382386
#> 3  2.1287884 -1.9262455 -0.9123639  0.4258381  0.39599178 -0.29156972
#> 4 -1.8683588  2.4997439  0.8505465  0.2791634  1.39317601 -0.88331381
#> 5  0.7822565  0.2378688 -0.9405956  0.5658857 -0.83136070  0.19311936
#> 6 -2.5926326  0.3342038  2.5400675  0.5629496 -0.49924921 -0.08667536
#>           PC7        PC8        PC9          PC10       PC11
#> 1  1.44931115 -0.2875312 -0.4884377 -1.3390422677  0.1302104
#> 2  0.18405680  0.5860853  1.2514371  0.2195892726  1.0153447
#> 3  1.05539377 -0.3578300 -0.5148965  0.0008271163 -0.1700469
#> 4 -1.29735584 -0.1925489  1.3121993 -0.2818494168  0.4094772
#> 5  0.06304966  1.6385823  0.4779975  0.1605988085  0.4451393
#> 6  1.15404456 -0.4402043  0.3920498 -1.3255796236  0.5806908
ggcorr(df, label = T)

ggcorr(df_pc$x, label = T)

> Untuk melihat variabel yang paling berkontribusi/yang paling banya dirangkum informasinya oleh setiap PC dapat melihat nilai absolut dari eigen vector/matriks rotasi

df_pc$rotation[,1]
#>                               Age                   Flight.Distance 
#>                        0.07120467                        0.11957453 
#>             Inflight.wifi.service Departure.Arrival.time.convenient 
#>                        0.23103580                        0.09918330 
#>            Ease.of.Online.booking                     Gate.location 
#>                        0.16263344                        0.06405416 
#>                    Food.and.drink                   Online.boarding 
#>                        0.30215953                        0.28698476 
#>                      Seat.comfort            Inflight.entertainment 
#>                        0.34563173                        0.42222447 
#>                  On.board.service                  Leg.room.service 
#>                        0.27455501                        0.22043041 
#>                  Baggage.handling                   Checkin.service 
#>                        0.25483400                        0.18117271 
#>                  Inflight.service                       Cleanliness 
#>                        0.25874930                        0.35165900 
#>        Departure.Delay.in.Minutes          Arrival.Delay.in.Minutes 
#>                       -0.03815949                       -0.04078750

Variabel yang paling berkontribusi (paling banyak dirangkum) oleh PC1 adalah Inflight.entertainment sebesar (42.22%).

Selain mereduksi dimensi PCA dapat digunakan untuk memvisualisasikan high dimensionality data menggunakan biplot

Kita akan membuat visualisasi high dimensional data dari 100 observasi pertama ppt

airplane_small <- head(df, 100) 

melakukan PCA

airplane_pca_small <- airplane_small %>% scale() %>% prcomp()
summary(airplane_pca_small)  
#> Importance of components:
#>                           PC1    PC2    PC3    PC4     PC5    PC6     PC7
#> Standard deviation     1.9679 1.7177 1.5294 1.4020 1.10002 0.9995 0.96817
#> Proportion of Variance 0.2152 0.1639 0.1299 0.1092 0.06723 0.0555 0.05208
#> Cumulative Proportion  0.2152 0.3791 0.5090 0.6182 0.68544 0.7409 0.79302
#>                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
#> Standard deviation     0.87960 0.83053 0.67770 0.64973 0.60039 0.54532 0.49928
#> Proportion of Variance 0.04298 0.03832 0.02552 0.02345 0.02003 0.01652 0.01385
#> Cumulative Proportion  0.83600 0.87432 0.89984 0.92329 0.94332 0.95984 0.97369
#>                           PC15    PC16    PC17    PC18
#> Standard deviation     0.40975 0.38612 0.31082 0.24505
#> Proportion of Variance 0.00933 0.00828 0.00537 0.00334
#> Cumulative Proportion  0.98301 0.99130 0.99666 1.00000

membuat biplot

biplot(airplane_pca_small)

5 observasi yang merupakan outlier! - - - - -

PCA using FactoMineR

FactoMineR adalah package yang dibuat untuk melakukan eksplorasi data secara multivariat.

head(airplane)
#>   Gender     Customer.Type Age  Type.of.Travel    Class Flight.Distance
#> 1   Male    Loyal Customer  13 Personal Travel Eco Plus             460
#> 2   Male disloyal Customer  25 Business travel Business             235
#> 3 Female    Loyal Customer  26 Business travel Business            1142
#> 4 Female    Loyal Customer  25 Business travel Business             562
#> 5   Male    Loyal Customer  61 Business travel Business             214
#> 6 Female    Loyal Customer  26 Personal Travel      Eco            1180
#>   Inflight.wifi.service Departure.Arrival.time.convenient
#> 1                     3                                 4
#> 2                     3                                 2
#> 3                     2                                 2
#> 4                     2                                 5
#> 5                     3                                 3
#> 6                     3                                 4
#>   Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1                      3             1              5               3
#> 2                      3             3              1               3
#> 3                      2             2              5               5
#> 4                      5             5              2               2
#> 5                      3             3              4               5
#> 6                      2             1              1               2
#>   Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1            5                      5                4                3
#> 2            1                      1                1                5
#> 3            5                      5                4                3
#> 4            2                      2                2                5
#> 5            5                      3                3                4
#> 6            1                      1                3                4
#>   Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1                4               4                5           5
#> 2                3               1                4           1
#> 3                4               4                4           5
#> 4                3               1                4           2
#> 5                4               3                3           3
#> 6                4               4                4           1
#>   Departure.Delay.in.Minutes Arrival.Delay.in.Minutes            satisfaction
#> 1                         25                       18 neutral or dissatisfied
#> 2                          1                        6 neutral or dissatisfied
#> 3                          0                        0               satisfied
#> 4                         11                        9 neutral or dissatisfied
#> 5                          0                        0               satisfied
#> 6                          0                        0 neutral or dissatisfied
unique(airplane$Inflight.service)
#> [1] 5 4 3 1 2 0
unique(airplane$Inflight.entertainment)
#> [1] 5 1 2 3 4 0
airplane_facto <- airplane %>% 
  select_if(is.integer) %>%
  mutate(Inflight.service = as.factor(Inflight.service),
         Inflight.entertainment= as.factor(Inflight.entertainment), 
         satisfaction_score = airplane$satisfaction)

head(airplane_facto)
#>   Age Flight.Distance Inflight.wifi.service Departure.Arrival.time.convenient
#> 1  13             460                     3                                 4
#> 2  25             235                     3                                 2
#> 3  26            1142                     2                                 2
#> 4  25             562                     2                                 5
#> 5  61             214                     3                                 3
#> 6  26            1180                     3                                 4
#>   Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1                      3             1              5               3
#> 2                      3             3              1               3
#> 3                      2             2              5               5
#> 4                      5             5              2               2
#> 5                      3             3              4               5
#> 6                      2             1              1               2
#>   Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1            5                      5                4                3
#> 2            1                      1                1                5
#> 3            5                      5                4                3
#> 4            2                      2                2                5
#> 5            5                      3                3                4
#> 6            1                      1                3                4
#>   Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1                4               4                5           5
#> 2                3               1                4           1
#> 3                4               4                4           5
#> 4                3               1                4           2
#> 5                4               3                3           3
#> 6                4               4                4           1
#>   Departure.Delay.in.Minutes      satisfaction_score
#> 1                         25 neutral or dissatisfied
#> 2                          1 neutral or dissatisfied
#> 3                          0               satisfied
#> 4                         11 neutral or dissatisfied
#> 5                          0               satisfied
#> 6                          0 neutral or dissatisfied

FactoMineR menyediakan fungsi untuk membuat PCA yaitu PCA() dan memvisualisasikan PCA yaitu plot.PCA(). Visualisasi PCA dengan FactoMineR dapat melibatkan variabel kategorik untuk membedakan informasi kategori dengan cara memberikan warna pada titik-titik amatan sehingga plot lebih informatif.

1. Create PCA

# index kolom kategorik
qualivar <- c(10,15,18)

Notes:

# membuat PCA
library(FactoMineR)

airplane_pca_facto <- PCA(X = airplane_facto, # data
                     scale.unit = T, # data di-scale terlebih dahulu
                     quali.sup = qualivar, # index kolom kategorik di data
                     graph = F, # agar tidak dibuatkan plot, pembuatan plot di plot.PCA() saja
                     ncp = 11) # jumlah PC yang dihasilkan secara default = 5

2. Visualisasi PCA menggunakan plot.PCA()

plot.PCA(airplane_pca_facto, # objek hasil PCA 
         choix = "ind", # plot individual (observasi)
         select = "contrib3", # menampilkan index 5 observasi outlier terluar
         habillage = 18, # mewarnai titik-titik amatan berdasarkan kolom kategorik
         invisible = "quali") # menghilangkan label kolom kategori (mengganggu visual)

Dapat dilihat persebaran data tentang satisfaction_score menggunakan scatter plotm dapat dilihat bahwa persebaran customer satisfied dominan kearan kanan atas dan kanan bawah dari PCA Graph Individuals.
Selanjutnya kita lanjutkan menampilkan PCA Graph of Variables agar dapat diketahui ada variabel apa saja yang berada di daerah kanan atas dan kanan bawah untuk mengetahui variable apa saja yang memiliki korelasi yang tinggi dalam menentukan customer satisfaction.

plot.PCA(airplane_pca_facto, 
         choix = "var") # plot variable

Berdasarkan PCA graph of variables dapat kita tentukan bahwa, yang menjadi faktor utama customer satisfaction adalah Cleanliness,Seat.comfort,Online.boarding. Untu dataset yang digunakan tidak dapat dilanjutkan ke proses k-means, dikarenakan tidak adanya variable yang tidak berulang(special). Untuk dataset ini kita dapat mengambil insight business untuk mengetahui variable/faktor apa saja yang memeiliki korelasi yang tinggi untuk mendapatkan customer satisfaction.