# import libs
library(tidyverse)
library(GGally)
library(FactoMineR)
library(factoextra)
airplane <- read.csv("train.csv")
head(airplane)
#> X id Gender Customer.Type Age Type.of.Travel Class
#> 1 0 70172 Male Loyal Customer 13 Personal Travel Eco Plus
#> 2 1 5047 Male disloyal Customer 25 Business travel Business
#> 3 2 110028 Female Loyal Customer 26 Business travel Business
#> 4 3 24026 Female Loyal Customer 25 Business travel Business
#> 5 4 119299 Male Loyal Customer 61 Business travel Business
#> 6 5 111157 Female Loyal Customer 26 Personal Travel Eco
#> Flight.Distance Inflight.wifi.service Departure.Arrival.time.convenient
#> 1 460 3 4
#> 2 235 3 2
#> 3 1142 2 2
#> 4 562 2 5
#> 5 214 3 3
#> 6 1180 3 4
#> Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1 3 1 5 3
#> 2 3 3 1 3
#> 3 2 2 5 5
#> 4 5 5 2 2
#> 5 3 3 4 5
#> 6 2 1 1 2
#> Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1 5 5 4 3
#> 2 1 1 1 5
#> 3 5 5 4 3
#> 4 2 2 2 5
#> 5 5 3 3 4
#> 6 1 1 3 4
#> Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1 4 4 5 5
#> 2 3 1 4 1
#> 3 4 4 4 5
#> 4 3 1 4 2
#> 5 4 3 3 3
#> 6 4 4 4 1
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes satisfaction
#> 1 25 18 neutral or dissatisfied
#> 2 1 6 neutral or dissatisfied
#> 3 0 0 satisfied
#> 4 11 9 neutral or dissatisfied
#> 5 0 0 satisfied
#> 6 0 0 neutral or dissatisfied
Gender: Gender of the passengers (Female, Male)
Customer Type: The customer type (Loyal customer, disloyal customer)
Age: The actual age of the passengers
Type of Travel: Purpose of the flight of the passengers (Personal Travel, Business Travel)
Class: Travel class in the plane of the passengers (Business, Eco, Eco Plus)
Flight distance: The flight distance of this journey
Inflight wifi service: Satisfaction level of the inflight wifi service (0:Not Applicable;1-5)
Departure/Arrival time convenient: Satisfaction level of Departure/Arrival time convenient
Ease of Online booking: Satisfaction level of online booking
Gate location: Satisfaction level of Gate location
Food and drink: Satisfaction level of Food and drink
Online boarding: Satisfaction level of online boarding
Seat comfort: Satisfaction level of Seat comfort
Inflight entertainment: Satisfaction level of inflight entertainment
On-board service: Satisfaction level of On-board service
Leg room service: Satisfaction level of Leg room service
Baggage handling: Satisfaction level of baggage handling
Check-in service: Satisfaction level of Check-in service
Inflight service: Satisfaction level of inflight service
Cleanliness: Satisfaction level of Cleanliness
Departure Delay in Minutes: Minutes delayed when departure
Arrival Delay in Minutes: Minutes delayed when Arrival
Satisfaction: Airline satisfaction level(Satisfaction, neutral or dissatisfaction)
glimpse(airplane)
#> Rows: 103,904
#> Columns: 25
#> $ X <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1…
#> $ id <int> 70172, 5047, 110028, 24026, 119299,…
#> $ Gender <chr> "Male", "Male", "Female", "Female",…
#> $ Customer.Type <chr> "Loyal Customer", "disloyal Custome…
#> $ Age <int> 13, 25, 26, 25, 61, 26, 47, 52, 41,…
#> $ Type.of.Travel <chr> "Personal Travel", "Business travel…
#> $ Class <chr> "Eco Plus", "Business", "Business",…
#> $ Flight.Distance <int> 460, 235, 1142, 562, 214, 1180, 127…
#> $ Inflight.wifi.service <int> 3, 3, 2, 2, 3, 3, 2, 4, 1, 3, 4, 2,…
#> $ Departure.Arrival.time.convenient <int> 4, 2, 2, 5, 3, 4, 4, 3, 2, 3, 5, 4,…
#> $ Ease.of.Online.booking <int> 3, 3, 2, 5, 3, 2, 2, 4, 2, 3, 5, 2,…
#> $ Gate.location <int> 1, 3, 2, 5, 3, 1, 3, 4, 2, 4, 4, 2,…
#> $ Food.and.drink <int> 5, 1, 5, 2, 4, 1, 2, 5, 4, 2, 2, 1,…
#> $ Online.boarding <int> 3, 3, 5, 2, 5, 2, 2, 5, 3, 3, 5, 2,…
#> $ Seat.comfort <int> 5, 1, 5, 2, 5, 1, 2, 5, 3, 3, 2, 1,…
#> $ Inflight.entertainment <int> 5, 1, 5, 2, 3, 1, 2, 5, 1, 2, 2, 1,…
#> $ On.board.service <int> 4, 1, 4, 2, 3, 3, 3, 5, 1, 2, 3, 1,…
#> $ Leg.room.service <int> 3, 5, 3, 5, 4, 4, 3, 5, 2, 3, 3, 2,…
#> $ Baggage.handling <int> 4, 3, 4, 3, 4, 4, 4, 5, 1, 4, 5, 5,…
#> $ Checkin.service <int> 4, 1, 4, 1, 3, 4, 3, 4, 4, 4, 3, 5,…
#> $ Inflight.service <int> 5, 4, 4, 4, 3, 4, 5, 5, 1, 3, 5, 5,…
#> $ Cleanliness <int> 5, 1, 5, 2, 3, 1, 2, 4, 2, 2, 2, 1,…
#> $ Departure.Delay.in.Minutes <int> 25, 1, 0, 11, 0, 0, 9, 4, 0, 0, 0, …
#> $ Arrival.Delay.in.Minutes <dbl> 18, 6, 0, 9, 0, 0, 23, 0, 0, 0, 0, …
#> $ satisfaction <chr> "neutral or dissatisfied", "neutral…
Membuang kolom ang tidak diperlukan dalam proses pengerjaan Unsupervised Learning - X - id
airplane <- airplane %>%
select(-c(1,2))
glimpse(airplane)
#> Rows: 103,904
#> Columns: 23
#> $ Gender <chr> "Male", "Male", "Female", "Female",…
#> $ Customer.Type <chr> "Loyal Customer", "disloyal Custome…
#> $ Age <int> 13, 25, 26, 25, 61, 26, 47, 52, 41,…
#> $ Type.of.Travel <chr> "Personal Travel", "Business travel…
#> $ Class <chr> "Eco Plus", "Business", "Business",…
#> $ Flight.Distance <int> 460, 235, 1142, 562, 214, 1180, 127…
#> $ Inflight.wifi.service <int> 3, 3, 2, 2, 3, 3, 2, 4, 1, 3, 4, 2,…
#> $ Departure.Arrival.time.convenient <int> 4, 2, 2, 5, 3, 4, 4, 3, 2, 3, 5, 4,…
#> $ Ease.of.Online.booking <int> 3, 3, 2, 5, 3, 2, 2, 4, 2, 3, 5, 2,…
#> $ Gate.location <int> 1, 3, 2, 5, 3, 1, 3, 4, 2, 4, 4, 2,…
#> $ Food.and.drink <int> 5, 1, 5, 2, 4, 1, 2, 5, 4, 2, 2, 1,…
#> $ Online.boarding <int> 3, 3, 5, 2, 5, 2, 2, 5, 3, 3, 5, 2,…
#> $ Seat.comfort <int> 5, 1, 5, 2, 5, 1, 2, 5, 3, 3, 2, 1,…
#> $ Inflight.entertainment <int> 5, 1, 5, 2, 3, 1, 2, 5, 1, 2, 2, 1,…
#> $ On.board.service <int> 4, 1, 4, 2, 3, 3, 3, 5, 1, 2, 3, 1,…
#> $ Leg.room.service <int> 3, 5, 3, 5, 4, 4, 3, 5, 2, 3, 3, 2,…
#> $ Baggage.handling <int> 4, 3, 4, 3, 4, 4, 4, 5, 1, 4, 5, 5,…
#> $ Checkin.service <int> 4, 1, 4, 1, 3, 4, 3, 4, 4, 4, 3, 5,…
#> $ Inflight.service <int> 5, 4, 4, 4, 3, 4, 5, 5, 1, 3, 5, 5,…
#> $ Cleanliness <int> 5, 1, 5, 2, 3, 1, 2, 4, 2, 2, 2, 1,…
#> $ Departure.Delay.in.Minutes <int> 25, 1, 0, 11, 0, 0, 9, 4, 0, 0, 0, …
#> $ Arrival.Delay.in.Minutes <dbl> 18, 6, 0, 9, 0, 0, 23, 0, 0, 0, 0, …
#> $ satisfaction <chr> "neutral or dissatisfied", "neutral…
colSums(is.na(airplane))
#> Gender Customer.Type
#> 0 0
#> Age Type.of.Travel
#> 0 0
#> Class Flight.Distance
#> 0 0
#> Inflight.wifi.service Departure.Arrival.time.convenient
#> 0 0
#> Ease.of.Online.booking Gate.location
#> 0 0
#> Food.and.drink Online.boarding
#> 0 0
#> Seat.comfort Inflight.entertainment
#> 0 0
#> On.board.service Leg.room.service
#> 0 0
#> Baggage.handling Checkin.service
#> 0 0
#> Inflight.service Cleanliness
#> 0 0
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes
#> 0 310
#> satisfaction
#> 0
airplane <- drop_na(data = airplane)
colSums(is.na(airplane))
#> Gender Customer.Type
#> 0 0
#> Age Type.of.Travel
#> 0 0
#> Class Flight.Distance
#> 0 0
#> Inflight.wifi.service Departure.Arrival.time.convenient
#> 0 0
#> Ease.of.Online.booking Gate.location
#> 0 0
#> Food.and.drink Online.boarding
#> 0 0
#> Seat.comfort Inflight.entertainment
#> 0 0
#> On.board.service Leg.room.service
#> 0 0
#> Baggage.handling Checkin.service
#> 0 0
#> Inflight.service Cleanliness
#> 0 0
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes
#> 0 0
#> satisfaction
#> 0
airplane %>%
select_if(is.numeric) %>%
var()
#> Age Flight.Distance
#> Age 228.40655557 1504.779961
#> Flight.Distance 1504.77996080 994601.774592
#> Inflight.wifi.service 0.35058863 9.336387
#> Departure.Arrival.time.convenient 0.87680615 -30.282684
#> Ease.of.Online.booking 0.51717040 91.657110
#> Gate.location -0.03008399 6.030465
#> Food.and.drink 0.46049850 75.513602
#> Online.boarding 4.25587545 289.600340
#> Seat.comfort 3.19523624 207.186796
#> Inflight.entertainment 1.53876555 171.023591
#> On.board.service 1.11218758 140.737311
#> Leg.room.service 0.80509199 175.577729
#> Baggage.handling -0.84997363 74.467137
#> Checkin.service 0.66940264 92.406337
#> Inflight.service -0.88656527 67.332932
#> Cleanliness 1.06084342 121.862023
#> Departure.Delay.in.Minutes -5.84723167 72.444795
#> Arrival.Delay.in.Minutes -7.10419069 -93.621977
#> Inflight.wifi.service
#> Age 0.3505886
#> Flight.Distance 9.3363872
#> Inflight.wifi.service 1.7632288
#> Departure.Arrival.time.convenient 0.6962152
#> Ease.of.Online.booking 1.3297571
#> Gate.location 0.5702877
#> Food.and.drink 0.2376098
#> Online.boarding 0.8188866
#> Seat.comfort 0.2147409
#> Inflight.entertainment 0.3708563
#> On.board.service 0.2078183
#> Leg.room.service 0.2803164
#> Baggage.handling 0.1898556
#> Checkin.service 0.0725514
#> Inflight.service 0.1726926
#> Cleanliness 0.2311350
#> Departure.Delay.in.Minutes -0.8832421
#> Arrival.Delay.in.Minutes -0.9812316
#> Departure.Arrival.time.convenient
#> Age 0.876806148
#> Flight.Distance -30.282683759
#> Inflight.wifi.service 0.696215181
#> Departure.Arrival.time.convenient 2.326335348
#> Ease.of.Online.booking 0.932471611
#> Gate.location 0.866448429
#> Food.and.drink 0.010521659
#> Online.boarding 0.144053226
#> Seat.comfort 0.022963941
#> Inflight.entertainment -0.009521065
#> On.board.service 0.134802944
#> Leg.room.service 0.025000553
#> Baggage.handling 0.129520133
#> Checkin.service 0.180126996
#> Inflight.service 0.131301374
#> Cleanliness 0.028693693
#> Departure.Delay.in.Minutes 0.045966689
#> Arrival.Delay.in.Minutes -0.051008212
#> Ease.of.Online.booking Gate.location
#> Age 0.51717040 -0.030083990
#> Flight.Distance 91.65710971 6.030465199
#> Inflight.wifi.service 1.32975707 0.570287678
#> Departure.Arrival.time.convenient 0.93247161 0.866448429
#> Ease.of.Online.booking 1.95701517 0.819984187
#> Gate.location 0.81998419 1.632574799
#> Food.and.drink 0.05940085 -0.001987982
#> Online.boarding 0.76283403 0.002501937
#> Seat.comfort 0.05539062 0.005701547
#> Inflight.entertainment 0.08799089 0.006070411
#> On.board.service 0.06985204 -0.046966402
#> Leg.room.service 0.19769204 -0.009862311
#> Baggage.handling 0.06419087 0.003652906
#> Checkin.service 0.01939632 -0.057317696
#> Inflight.service 0.05810392 0.002616116
#> Cleanliness 0.02972249 -0.006731878
#> Departure.Delay.in.Minutes -0.33550176 0.269459278
#> Arrival.Delay.in.Minutes -0.43220849 0.254295699
#> Food.and.drink Online.boarding Seat.comfort
#> Age 0.460498503 4.255875455 3.195236243
#> Flight.Distance 75.513602178 289.600339811 207.186796038
#> Inflight.wifi.service 0.237609766 0.818886582 0.214740926
#> Departure.Arrival.time.convenient 0.010521659 0.144053226 0.022963941
#> Ease.of.Online.booking 0.059400849 0.762834027 0.055390616
#> Gate.location -0.001987982 0.002501937 0.005701547
#> Food.and.drink 1.767307559 0.420663950 1.007401695
#> Online.boarding 0.420663950 1.820968592 0.747619068
#> Seat.comfort 1.007401695 0.747619068 1.739486574
#> Inflight.entertainment 1.102928704 0.513017727 1.073538262
#> On.board.service 0.101044950 0.270059711 0.224334466
#> Leg.room.service 0.056683575 0.219717093 0.182939389
#> Baggage.handling 0.054657192 0.132757214 0.116129738
#> Checkin.service 0.146446074 0.348699501 0.319673852
#> Inflight.service 0.053257502 0.118012325 0.107283783
#> Cleanliness 1.147221679 0.586989474 1.174206912
#> Departure.Delay.in.Minutes -1.519302426 -0.952342522 -1.373566363
#> Arrival.Delay.in.Minutes -1.673243800 -1.146184606 -1.526088128
#> Inflight.entertainment On.board.service
#> Age 1.538765547 1.11218758
#> Flight.Distance 171.023590810 140.73731125
#> Inflight.wifi.service 0.370856339 0.20781831
#> Departure.Arrival.time.convenient -0.009521065 0.13480294
#> Ease.of.Online.booking 0.087990889 0.06985204
#> Gate.location 0.006070411 -0.04696640
#> Food.and.drink 1.102928704 0.10104495
#> Online.boarding 0.513017727 0.27005971
#> Seat.comfort 1.073538262 0.22433447
#> Inflight.entertainment 1.776970040 0.72188070
#> On.board.service 0.721880702 1.65967689
#> Leg.room.service 0.525781710 0.60270369
#> Baggage.handling 0.595682229 0.79005770
#> Checkin.service 0.203787271 0.39752558
#> Inflight.service 0.635068667 0.83407948
#> Cleanliness 1.209979192 0.20832848
#> Departure.Delay.in.Minutes -1.406996555 -1.54555147
#> Arrival.Delay.in.Minutes -1.583851186 -1.75622960
#> Leg.room.service Baggage.handling
#> Age 0.805091989 -0.849973626
#> Flight.Distance 175.577728835 74.467136690
#> Inflight.wifi.service 0.280316362 0.189855626
#> Departure.Arrival.time.convenient 0.025000553 0.129520133
#> Ease.of.Online.booking 0.197692041 0.064190866
#> Gate.location -0.009862311 0.003652906
#> Food.and.drink 0.056683575 0.054657192
#> Online.boarding 0.219717093 0.132757214
#> Seat.comfort 0.182939389 0.116129738
#> Inflight.entertainment 0.525781710 0.595682229
#> On.board.service 0.602703691 0.790057703
#> Leg.room.service 1.730299941 0.574311896
#> Baggage.handling 0.574311896 1.394882123
#> Checkin.service 0.254802378 0.348705323
#> Inflight.service 0.570505493 0.873256280
#> Cleanliness 0.166394388 0.148441242
#> Departure.Delay.in.Minutes 0.718810866 -0.255841464
#> Arrival.Delay.in.Minutes 0.602874458 -0.390398284
#> Checkin.service Inflight.service
#> Age 0.66940264 -0.886565269
#> Flight.Distance 92.40633705 67.332931772
#> Inflight.wifi.service 0.07255140 0.172692617
#> Departure.Arrival.time.convenient 0.18012700 0.131301374
#> Ease.of.Online.booking 0.01939632 0.058103920
#> Gate.location -0.05731770 0.002616116
#> Food.and.drink 0.14644607 0.053257502
#> Online.boarding 0.34869950 0.118012325
#> Seat.comfort 0.31967385 0.107283783
#> Inflight.entertainment 0.20378727 0.635068667
#> On.board.service 0.39752558 0.834079475
#> Leg.room.service 0.25480238 0.570505493
#> Baggage.handling 0.34870532 0.873256280
#> Checkin.service 1.60122715 0.352942450
#> Inflight.service 0.35294245 1.382042439
#> Cleanliness 0.29793569 0.137125679
#> Departure.Delay.in.Minutes -0.87133809 -2.439985786
#> Arrival.Delay.in.Minutes -0.99747099 -2.693072766
#> Cleanliness Departure.Delay.in.Minutes
#> Age 1.060843423 -5.84723167
#> Flight.Distance 121.862023254 72.44479531
#> Inflight.wifi.service 0.231134995 -0.88324209
#> Departure.Arrival.time.convenient 0.028693693 0.04596669
#> Ease.of.Online.booking 0.029722489 -0.33550176
#> Gate.location -0.006731878 0.26945928
#> Food.and.drink 1.147221679 -1.51930243
#> Online.boarding 0.586989474 -0.95234252
#> Seat.comfort 1.174206912 -1.37356636
#> Inflight.entertainment 1.209979192 -1.40699656
#> On.board.service 0.208328484 -1.54555147
#> Leg.room.service 0.166394388 0.71881087
#> Baggage.handling 0.148441242 -0.25584146
#> Checkin.service 0.297935686 -0.87133809
#> Inflight.service 0.137125679 -2.43998579
#> Cleanliness 1.721852736 -0.69197344
#> Departure.Delay.in.Minutes -0.691973437 1452.88564078
#> Arrival.Delay.in.Minutes -0.800992408 1424.14948547
#> Arrival.Delay.in.Minutes
#> Age -7.10419069
#> Flight.Distance -93.62197729
#> Inflight.wifi.service -0.98123157
#> Departure.Arrival.time.convenient -0.05100821
#> Ease.of.Online.booking -0.43220849
#> Gate.location 0.25429570
#> Food.and.drink -1.67324380
#> Online.boarding -1.14618461
#> Seat.comfort -1.52608813
#> Inflight.entertainment -1.58385119
#> On.board.service -1.75622960
#> Leg.room.service 0.60287446
#> Baggage.handling -0.39039828
#> Checkin.service -0.99747099
#> Inflight.service -2.69307277
#> Cleanliness -0.80099241
#> Departure.Delay.in.Minutes 1424.14948547
#> Arrival.Delay.in.Minutes 1497.58799016
Principle Component Analysis (PCA) merupakan salah satu metode untuk mereduksi dimensi dengan cara merangkum informasi (variance) dari variabel-variabel yang ada menjadi dimensi-dimensi baru yang disebut principal component (PC).
head(airplane)
#> Gender Customer.Type Age Type.of.Travel Class Flight.Distance
#> 1 Male Loyal Customer 13 Personal Travel Eco Plus 460
#> 2 Male disloyal Customer 25 Business travel Business 235
#> 3 Female Loyal Customer 26 Business travel Business 1142
#> 4 Female Loyal Customer 25 Business travel Business 562
#> 5 Male Loyal Customer 61 Business travel Business 214
#> 6 Female Loyal Customer 26 Personal Travel Eco 1180
#> Inflight.wifi.service Departure.Arrival.time.convenient
#> 1 3 4
#> 2 3 2
#> 3 2 2
#> 4 2 5
#> 5 3 3
#> 6 3 4
#> Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1 3 1 5 3
#> 2 3 3 1 3
#> 3 2 2 5 5
#> 4 5 5 2 2
#> 5 3 3 4 5
#> 6 2 1 1 2
#> Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1 5 5 4 3
#> 2 1 1 1 5
#> 3 5 5 4 3
#> 4 2 2 2 5
#> 5 5 3 3 4
#> 6 1 1 3 4
#> Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1 4 4 5 5
#> 2 3 1 4 1
#> 3 4 4 4 5
#> 4 3 1 4 2
#> 5 4 3 3 3
#> 6 4 4 4 1
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes satisfaction
#> 1 25 18 neutral or dissatisfied
#> 2 1 6 neutral or dissatisfied
#> 3 0 0 satisfied
#> 4 11 9 neutral or dissatisfied
#> 5 0 0 satisfied
#> 6 0 0 neutral or dissatisfied
df <- airplane %>%
select(-c(1,2,4,5,23))
head(df,3)
#> Age Flight.Distance Inflight.wifi.service Departure.Arrival.time.convenient
#> 1 13 460 3 4
#> 2 25 235 3 2
#> 3 26 1142 2 2
#> Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1 3 1 5 3
#> 2 3 3 1 3
#> 3 2 2 5 5
#> Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1 5 5 4 3
#> 2 1 1 1 5
#> 3 5 5 4 3
#> Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1 4 4 5 5
#> 2 3 1 4 1
#> 3 4 4 4 5
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes
#> 1 25 18
#> 2 1 6
#> 3 0 0
glimpse(df)
#> Rows: 103,594
#> Columns: 18
#> $ Age <int> 13, 25, 26, 25, 61, 26, 47, 52, 41,…
#> $ Flight.Distance <int> 460, 235, 1142, 562, 214, 1180, 127…
#> $ Inflight.wifi.service <int> 3, 3, 2, 2, 3, 3, 2, 4, 1, 3, 4, 2,…
#> $ Departure.Arrival.time.convenient <int> 4, 2, 2, 5, 3, 4, 4, 3, 2, 3, 5, 4,…
#> $ Ease.of.Online.booking <int> 3, 3, 2, 5, 3, 2, 2, 4, 2, 3, 5, 2,…
#> $ Gate.location <int> 1, 3, 2, 5, 3, 1, 3, 4, 2, 4, 4, 2,…
#> $ Food.and.drink <int> 5, 1, 5, 2, 4, 1, 2, 5, 4, 2, 2, 1,…
#> $ Online.boarding <int> 3, 3, 5, 2, 5, 2, 2, 5, 3, 3, 5, 2,…
#> $ Seat.comfort <int> 5, 1, 5, 2, 5, 1, 2, 5, 3, 3, 2, 1,…
#> $ Inflight.entertainment <int> 5, 1, 5, 2, 3, 1, 2, 5, 1, 2, 2, 1,…
#> $ On.board.service <int> 4, 1, 4, 2, 3, 3, 3, 5, 1, 2, 3, 1,…
#> $ Leg.room.service <int> 3, 5, 3, 5, 4, 4, 3, 5, 2, 3, 3, 2,…
#> $ Baggage.handling <int> 4, 3, 4, 3, 4, 4, 4, 5, 1, 4, 5, 5,…
#> $ Checkin.service <int> 4, 1, 4, 1, 3, 4, 3, 4, 4, 4, 3, 5,…
#> $ Inflight.service <int> 5, 4, 4, 4, 3, 4, 5, 5, 1, 3, 5, 5,…
#> $ Cleanliness <int> 5, 1, 5, 2, 3, 1, 2, 4, 2, 2, 2, 1,…
#> $ Departure.Delay.in.Minutes <int> 25, 1, 0, 11, 0, 0, 9, 4, 0, 0, 0, …
#> $ Arrival.Delay.in.Minutes <dbl> 18, 6, 0, 9, 0, 0, 23, 0, 0, 0, 0, …
dim(df)
#> [1] 103594 18
df_pc <- df %>% scale() %>% prcomp()
head(df_pc$x, 3)
#> PC1 PC2 PC3 PC4 PC5 PC6 PC7
#> [1,] 2.106698 -1.236612 -0.2322703 -0.3423958 1.91270733 -0.2153185 1.4493111
#> [2,] -3.322792 1.241931 1.2603804 0.5424145 0.08913361 -2.1738239 0.1840568
#> [3,] 2.128788 -1.926245 -0.9123639 0.4258381 0.39599178 -0.2915697 1.0553938
#> PC8 PC9 PC10 PC11 PC12 PC13
#> [1,] -0.2875312 -0.4884377 -1.3390422677 0.1302104 -0.1410566 0.60010621
#> [2,] 0.5860853 1.2514371 0.2195892726 1.0153447 0.6478209 0.81200696
#> [3,] -0.3578300 -0.5148965 0.0008271163 -0.1700469 0.4501204 -0.01314923
#> PC14 PC15 PC16 PC17 PC18
#> [1,] -0.3891295 0.4518179 -0.4413054 0.05565799 -0.136282705
#> [2,] 0.4108619 -0.2428415 0.2189309 0.17851657 0.081910451
#> [3,] 0.5884105 -0.2076590 -0.1160029 -0.19549608 -0.001516704
dim(df_pc$x)
#> [1] 103594 18
# 1 menghitung matriks variance-covariance yang memuat informasi pada data awal
var_cov <- df %>%
scale() %>%
var()
var_cov
#> Age Flight.Distance
#> Age 1.000000000 0.099837593
#> Flight.Distance 0.099837593 1.000000000
#> Inflight.wifi.service 0.017469851 0.007050175
#> Departure.Arrival.time.convenient 0.038037603 -0.019908273
#> Ease.of.Online.booking 0.024461455 0.065696835
#> Gate.location -0.001557918 0.004732489
#> Food.and.drink 0.022920172 0.056956659
#> Online.boarding 0.208681236 0.215190564
#> Seat.comfort 0.160301708 0.157516814
#> Inflight.entertainment 0.076379730 0.128644538
#> On.board.service 0.057123128 0.109540031
#> Leg.room.service 0.040497716 0.133839441
#> Baggage.handling -0.047619235 0.063222445
#> Checkin.service 0.035003112 0.073223531
#> Inflight.service -0.049899448 0.057430450
#> Cleanliness 0.053493252 0.093120600
#> Departure.Delay.in.Minutes -0.010150333 0.001905754
#> Arrival.Delay.in.Minutes -0.012146864 -0.002425811
#> Inflight.wifi.service
#> Age 0.017469851
#> Flight.Distance 0.007050175
#> Inflight.wifi.service 1.000000000
#> Departure.Arrival.time.convenient 0.343758147
#> Ease.of.Online.booking 0.715848109
#> Gate.location 0.336126779
#> Food.and.drink 0.134602736
#> Online.boarding 0.457002102
#> Seat.comfort 0.122616793
#> Inflight.entertainment 0.209513168
#> On.board.service 0.121483616
#> Leg.room.service 0.160484621
#> Baggage.handling 0.121059921
#> Checkin.service 0.043178243
#> Inflight.service 0.110626393
#> Cleanliness 0.132651868
#> Departure.Delay.in.Minutes -0.017450574
#> Arrival.Delay.in.Minutes -0.019095059
#> Departure.Arrival.time.convenient
#> Age 0.0380376033
#> Flight.Distance -0.0199082730
#> Inflight.wifi.service 0.3437581468
#> Departure.Arrival.time.convenient 1.0000000000
#> Ease.of.Online.booking 0.4370210571
#> Gate.location 0.4446005692
#> Food.and.drink 0.0051890995
#> Online.boarding 0.0699899368
#> Seat.comfort 0.0114156259
#> Inflight.entertainment -0.0046828402
#> On.board.service 0.0686043168
#> Leg.room.service 0.0124609961
#> Baggage.handling 0.0719005755
#> Checkin.service 0.0933289055
#> Inflight.service 0.0732271988
#> Cleanliness 0.0143368018
#> Departure.Delay.in.Minutes 0.0007906629
#> Arrival.Delay.in.Minutes -0.0008641871
#> Ease.of.Online.booking Gate.location
#> Age 0.024461455 -0.001557918
#> Flight.Distance 0.065696835 0.004732489
#> Inflight.wifi.service 0.715848109 0.336126779
#> Departure.Arrival.time.convenient 0.437021057 0.444600569
#> Ease.of.Online.booking 1.000000000 0.458745522
#> Gate.location 0.458745522 1.000000000
#> Food.and.drink 0.031940337 -0.001170361
#> Online.boarding 0.404093378 0.001451071
#> Seat.comfort 0.030021236 0.003383340
#> Inflight.entertainment 0.047184630 0.003564031
#> On.board.service 0.038758791 -0.028532444
#> Leg.room.service 0.107431439 -0.005867883
#> Baggage.handling 0.038851463 0.002420656
#> Checkin.service 0.010957101 -0.035450773
#> Inflight.service 0.035330326 0.001741645
#> Cleanliness 0.016191612 -0.004015149
#> Departure.Delay.in.Minutes -0.006291903 0.005532748
#> Arrival.Delay.in.Minutes -0.007983623 0.005142879
#> Food.and.drink Online.boarding Seat.comfort
#> Age 0.022920172 0.208681236 0.16030171
#> Flight.Distance 0.056956659 0.215190564 0.15751681
#> Inflight.wifi.service 0.134602736 0.457002102 0.12261679
#> Departure.Arrival.time.convenient 0.005189100 0.069989937 0.01141563
#> Ease.of.Online.booking 0.031940337 0.404093378 0.03002124
#> Gate.location -0.001170361 0.001451071 0.00338334
#> Food.and.drink 1.000000000 0.234491983 0.57456090
#> Online.boarding 0.234491983 1.000000000 0.42006703
#> Seat.comfort 0.574560901 0.420067035 1.00000000
#> Inflight.entertainment 0.622373825 0.285194465 0.61061425
#> On.board.service 0.058999288 0.155344827 0.13203032
#> Leg.room.service 0.032414585 0.123780411 0.10544743
#> Baggage.handling 0.034811481 0.083298686 0.07455283
#> Checkin.service 0.087055295 0.204208411 0.19154468
#> Inflight.service 0.034077210 0.074390153 0.06919311
#> Cleanliness 0.657647654 0.331498130 0.67847850
#> Departure.Delay.in.Minutes -0.029982818 -0.018515107 -0.02732269
#> Arrival.Delay.in.Minutes -0.032524227 -0.021948617 -0.02990012
#> Inflight.entertainment On.board.service
#> Age 0.076379730 0.05712313
#> Flight.Distance 0.128644538 0.10954003
#> Inflight.wifi.service 0.209513168 0.12148362
#> Departure.Arrival.time.convenient -0.004682840 0.06860432
#> Ease.of.Online.booking 0.047184630 0.03875879
#> Gate.location 0.003564031 -0.02853244
#> Food.and.drink 0.622373825 0.05899929
#> Online.boarding 0.285194465 0.15534483
#> Seat.comfort 0.610614249 0.13203032
#> Inflight.entertainment 1.000000000 0.42035247
#> On.board.service 0.420352471 1.00000000
#> Leg.room.service 0.299850464 0.35565702
#> Baggage.handling 0.378360540 0.51925213
#> Checkin.service 0.120812126 0.24385229
#> Inflight.service 0.405247126 0.55072523
#> Cleanliness 0.691735153 0.12323636
#> Departure.Delay.in.Minutes -0.027690912 -0.03147429
#> Arrival.Delay.in.Minutes -0.030702812 -0.03522681
#> Leg.room.service Baggage.handling
#> Age 0.040497716 -0.047619235
#> Flight.Distance 0.133839441 0.063222445
#> Inflight.wifi.service 0.160484621 0.121059921
#> Departure.Arrival.time.convenient 0.012460996 0.071900575
#> Ease.of.Online.booking 0.107431439 0.038851463
#> Gate.location -0.005867883 0.002420656
#> Food.and.drink 0.032414585 0.034811481
#> Online.boarding 0.123780411 0.083298686
#> Seat.comfort 0.105447427 0.074552833
#> Inflight.entertainment 0.299850464 0.378360540
#> On.board.service 0.355657016 0.519252126
#> Leg.room.service 1.000000000 0.369673536
#> Baggage.handling 0.369673536 1.000000000
#> Checkin.service 0.153079244 0.233326115
#> Inflight.service 0.368925305 0.628944428
#> Cleanliness 0.096400653 0.095782865
#> Departure.Delay.in.Minutes 0.014336338 -0.005683115
#> Arrival.Delay.in.Minutes 0.011843226 -0.008541674
#> Checkin.service Inflight.service Cleanliness
#> Age 0.03500311 -0.049899448 0.053493252
#> Flight.Distance 0.07322353 0.057430450 0.093120600
#> Inflight.wifi.service 0.04317824 0.110626393 0.132651868
#> Departure.Arrival.time.convenient 0.09332891 0.073227199 0.014336802
#> Ease.of.Online.booking 0.01095710 0.035330326 0.016191612
#> Gate.location -0.03545077 0.001741645 -0.004015149
#> Food.and.drink 0.08705530 0.034077210 0.657647654
#> Online.boarding 0.20420841 0.074390153 0.331498130
#> Seat.comfort 0.19154468 0.069193110 0.678478499
#> Inflight.entertainment 0.12081213 0.405247126 0.691735153
#> On.board.service 0.24385229 0.550725227 0.123236364
#> Leg.room.service 0.15307924 0.368925305 0.096400653
#> Baggage.handling 0.23332611 0.628944428 0.095782865
#> Checkin.service 1.00000000 0.237255743 0.179431236
#> Inflight.service 0.23725574 1.000000000 0.088891472
#> Cleanliness 0.17943124 0.088891472 1.000000000
#> Departure.Delay.in.Minutes -0.01806527 -0.054451631 -0.013834890
#> Arrival.Delay.in.Minutes -0.02036937 -0.059195845 -0.015773723
#> Departure.Delay.in.Minutes
#> Age -0.0101503333
#> Flight.Distance 0.0019057541
#> Inflight.wifi.service -0.0174505738
#> Departure.Arrival.time.convenient 0.0007906629
#> Ease.of.Online.booking -0.0062919026
#> Gate.location 0.0055327479
#> Food.and.drink -0.0299828180
#> Online.boarding -0.0185151073
#> Seat.comfort -0.0273226860
#> Inflight.entertainment -0.0276909124
#> On.board.service -0.0314742929
#> Leg.room.service 0.0143363377
#> Baggage.handling -0.0056831152
#> Checkin.service -0.0180652723
#> Inflight.service -0.0544516308
#> Cleanliness -0.0138348898
#> Departure.Delay.in.Minutes 1.0000000000
#> Arrival.Delay.in.Minutes 0.9654809014
#> Arrival.Delay.in.Minutes
#> Age -0.0121468636
#> Flight.Distance -0.0024258113
#> Inflight.wifi.service -0.0190950592
#> Departure.Arrival.time.convenient -0.0008641871
#> Ease.of.Online.booking -0.0079836230
#> Gate.location 0.0051428789
#> Food.and.drink -0.0325242269
#> Online.boarding -0.0219486175
#> Seat.comfort -0.0299001178
#> Inflight.entertainment -0.0307028116
#> On.board.service -0.0352268123
#> Leg.room.service 0.0118432259
#> Baggage.handling -0.0085416736
#> Checkin.service -0.0203693714
#> Inflight.service -0.0591958450
#> Cleanliness -0.0157737235
#> Departure.Delay.in.Minutes 0.9654809014
#> Arrival.Delay.in.Minutes 1.0000000000
# 2 memecah/mendekomposisi matriks variance covariance menjadi
# - eigen value (mewakili informasi yang dimuat oleh setiap PC)
# - eigen vector (besar perubahan arah garis linier)
eig <- eigen(var_cov)
eig
#> eigen() decomposition
#> $values
#> [1] 3.86067850 2.36324783 2.17933427 1.96253451 1.24514248 0.96288004
#> [7] 0.92834159 0.89139099 0.69149756 0.53392241 0.47895699 0.44686863
#> [13] 0.36696178 0.32321721 0.29467345 0.25070741 0.18515330 0.03449105
#>
#> $vectors
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] -0.07120467 0.01012126 0.10267024 0.009334937 -0.525477561
#> [2,] -0.11957453 -0.01979722 0.01113523 -0.021358489 -0.487347576
#> [3,] -0.23103580 0.44967656 0.04341691 0.013757640 -0.014003609
#> [4,] -0.09918330 0.41742737 -0.02621735 0.008812775 0.193658527
#> [5,] -0.16263344 0.53649871 0.04597711 0.016076055 -0.050209995
#> [6,] -0.06405416 0.43027733 0.03357226 0.010141601 0.262293874
#> [7,] -0.30215953 -0.15740790 0.33887851 0.003791875 0.254358523
#> [8,] -0.28698476 0.15158750 0.16991676 0.002118812 -0.425962598
#> [9,] -0.34563173 -0.15305620 0.31386895 -0.006024007 -0.012032057
#> [10,] -0.42222447 -0.17727834 0.06461320 -0.030309268 0.183369287
#> [11,] -0.27455501 -0.07053181 -0.37879139 -0.028696730 -0.042136364
#> [12,] -0.22043041 -0.01409189 -0.29502943 -0.066218375 -0.126762948
#> [13,] -0.25483400 -0.05720561 -0.43517739 -0.053156830 0.099220117
#> [14,] -0.18117271 -0.04862467 -0.13163708 -0.012954580 -0.169075688
#> [15,] -0.25874930 -0.06495181 -0.44670724 -0.010270936 0.110159477
#> [16,] -0.35165900 -0.17555336 0.31212097 -0.019326223 0.170266942
#> [17,] 0.03815949 0.02858195 0.03766455 -0.703433692 0.002757910
#> [18,] 0.04078750 0.02840667 0.03878262 -0.703103717 0.005126555
#> [,6] [,7] [,8] [,9] [,10]
#> [1,] 0.3740550852 -0.423176292 0.55790719 -0.001343398 0.088751158
#> [2,] -0.1323159182 -0.358733669 -0.73221346 -0.144626050 -0.062533189
#> [3,] -0.2808146857 0.206808241 0.14905443 -0.029679852 0.013772163
#> [4,] 0.4385748056 -0.159282423 -0.08967282 -0.019287178 -0.701883140
#> [5,] -0.1864947573 0.099179398 0.01373025 -0.010366237 0.034710802
#> [6,] 0.2178734430 -0.361072072 -0.17236842 0.101241481 0.629258604
#> [7,] -0.0194266751 -0.037931382 -0.02604581 0.022168648 -0.082077631
#> [8,] -0.1591530419 0.337370889 0.08866743 -0.182309074 0.007879992
#> [9,] 0.1062458161 -0.022935449 -0.02396641 0.041523858 0.045141729
#> [10,] -0.1045695031 -0.171778033 0.07369884 -0.049803882 0.022954043
#> [11,] 0.0454428573 -0.071478047 0.09998654 -0.256758258 -0.122274770
#> [12,] -0.2458334888 -0.129607636 0.03930357 0.851546367 -0.129469682
#> [13,] -0.0071149979 -0.016855469 0.02680305 -0.198245592 0.120006288
#> [14,] 0.6127957617 0.561426546 -0.24971525 0.222833230 0.183332998
#> [15,] 0.0007333593 -0.030947241 0.03457674 -0.212436912 0.093460244
#> [16,] 0.0540713789 0.007616781 -0.04294262 0.055584253 -0.019127205
#> [17,] 0.0102996065 0.008827023 0.01003874 -0.026388623 0.002723661
#> [18,] 0.0096234482 0.010267770 0.01184920 -0.024722868 0.003793709
#> [,11] [,12] [,13] [,14] [,15]
#> [1,] -0.132087739 -0.214886848 -0.006797917 -0.050105404 0.020842557
#> [2,] -0.054722927 -0.160004642 -0.001628660 -0.083847205 -0.004566956
#> [3,] -0.002692876 -0.286489090 0.033669271 -0.379875565 -0.121698219
#> [4,] -0.117046673 0.156561236 -0.014931834 0.056262318 0.044442241
#> [5,] -0.005033891 -0.176119175 -0.019018502 -0.064952907 -0.112546123
#> [6,] 0.154273064 0.175425058 0.014055350 0.195143456 0.107290862
#> [7,] -0.044502859 -0.515638984 0.043045264 0.594375842 -0.188506063
#> [8,] -0.030042440 0.410574608 -0.037896199 0.479635901 0.270343377
#> [9,] 0.007723806 0.486880565 -0.005564194 -0.207950755 -0.667013688
#> [10,] 0.039695092 -0.103797482 -0.054177738 -0.183680730 0.105369346
#> [11,] 0.784927569 -0.015432511 0.162483405 0.047600427 -0.021537656
#> [12,] 0.013761805 0.086371214 -0.002828309 0.112086840 0.024794779
#> [13,] -0.502741712 0.069886534 0.639411739 0.035829513 -0.022302884
#> [14,] 0.034836877 -0.244083649 -0.002839250 -0.071023598 -0.019304714
#> [15,] -0.258299478 0.006014077 -0.745331855 0.048045294 -0.042695955
#> [16,] -0.022392798 0.072500451 0.011518578 -0.341885090 0.624521445
#> [17,] 0.002990317 -0.008047350 -0.022648638 0.004548643 -0.009332673
#> [18,] 0.003822737 -0.006693515 -0.018724172 0.001079697 -0.008083642
#> [,16] [,17] [,18]
#> [1,] 0.021207248 -0.053595520 0.00107385980
#> [2,] -0.058247072 -0.031811140 0.00256072411
#> [3,] -0.570961846 -0.169995441 -0.00026586369
#> [4,] -0.098460068 0.074877358 0.00081010975
#> [5,] 0.763683036 0.056622327 0.00027672243
#> [6,] -0.150113097 -0.007971532 -0.00010578489
#> [7,] -0.034351868 -0.186890193 0.00249054209
#> [8,] -0.127503475 0.117160140 0.00113223231
#> [9,] 0.047431154 -0.121339126 0.00038714179
#> [10,] -0.047292538 0.800207910 -0.00207997688
#> [11,] 0.051185874 -0.165573712 0.00094644104
#> [12,] -0.016290419 -0.044091997 0.00021992106
#> [13,] 0.045145242 -0.068420366 -0.00046253290
#> [14,] -0.005860419 0.127034401 0.00006791429
#> [15,] 0.013993199 -0.185496219 0.00370216162
#> [16,] 0.166251083 -0.412322135 -0.00017979624
#> [17,] -0.001240937 -0.005618109 -0.70694104030
#> [18,] -0.001389102 -0.001657268 0.70724746617
# 3 melakukan transformasi terhadap data awal: data awal (103594x18) * egin vector (18x18)
df_pc_manual <- scale(df) %*% eig$vectors
head(df_pc_manual, 3)
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] -2.106698 -1.236612 0.2322703 -0.3423958 1.91270733 -0.2153185 1.4493111
#> [2,] 3.322792 1.241931 -1.2603804 0.5424145 0.08913361 -2.1738239 0.1840568
#> [3,] -2.128788 -1.926245 0.9123639 0.4258381 0.39599178 -0.2915697 1.0553938
#> [,8] [,9] [,10] [,11] [,12] [,13]
#> [1,] -0.2875312 -0.4884377 -1.3390422677 -0.1302104 -0.1410566 -0.60010621
#> [2,] 0.5860853 1.2514371 0.2195892726 -1.0153447 0.6478209 -0.81200696
#> [3,] -0.3578300 -0.5148965 0.0008271163 0.1700469 0.4501204 0.01314923
#> [,14] [,15] [,16] [,17] [,18]
#> [1,] -0.3891295 -0.4518179 0.4413054 -0.05565799 -0.136282705
#> [2,] 0.4108619 0.2428415 -0.2189309 -0.17851657 0.081910451
#> [3,] 0.5884105 0.2076590 0.1160029 0.19549608 -0.001516704
# head(df_pc$x, 3)
dim(df_pc_manual)
#> [1] 103594 18
ggcorr(df, label = T)
ggcorr(df_pc$x, label = T)
- Karena antar PC tidak saling berkorelasi, maka setiap PC merangkum informasi yang berbeda dengan PC lainnya (jumlah informasi yang dirangkum oleh PC1 > PC2 > PC3 > PC4 dst) - Tahapan reduksi dimensi: user menentukan akan menggunakan berapa PC berdasarkan kumulatif besar informasi yang dirangkum oleh setiap PC
summary(df_pc)
#> Importance of components:
#> PC1 PC2 PC3 PC4 PC5 PC6 PC7
#> Standard deviation 1.9649 1.5373 1.4763 1.4009 1.11586 0.98126 0.96350
#> Proportion of Variance 0.2145 0.1313 0.1211 0.1090 0.06917 0.05349 0.05157
#> Cumulative Proportion 0.2145 0.3458 0.4668 0.5759 0.64505 0.69855 0.75012
#> PC8 PC9 PC10 PC11 PC12 PC13 PC14
#> Standard deviation 0.94414 0.83156 0.73070 0.69207 0.66848 0.60577 0.56852
#> Proportion of Variance 0.04952 0.03842 0.02966 0.02661 0.02483 0.02039 0.01796
#> Cumulative Proportion 0.79964 0.83806 0.86772 0.89433 0.91916 0.93954 0.95750
#> PC15 PC16 PC17 PC18
#> Standard deviation 0.54284 0.50071 0.43029 0.18572
#> Proportion of Variance 0.01637 0.01393 0.01029 0.00192
#> Cumulative Proportion 0.97387 0.98780 0.99808 1.00000
Setelah melakukan PCA (linier) pada data
df, maka user memutuskan akan mereduksi jumlah kolom yang awalnya 18 kolom dengan hanya menggunakan 11 PC saja (PC1 sd PC11), karena cukup dengan menggunakan 11 PC sudah merangkum sebesar 89.43% informasi pada data awal
Keterangan:
- standard deviation: standar deviasi dari setiap PC. Jika standar deviasi diuadratkan, maka akan diperoleh variansi (eigen value)
- Proportion of Variance: persentase/proporsi informasi yang dirangkum oleh setiap PC
- Cumulative Proportion: persentase/proporsi informasi kumulatif
plot(df_pc)
Terdapat beberapa nilai yang dihasilkan oleh PCA yaitu :
$sdev: standar deviasi dari setiap PC. Jika standar deviasi diuadratkan, maka akan diperoleh variansi (eigen value)df_pc$sdev
#> [1] 1.9648609 1.5372859 1.4762568 1.4009049 1.1158595 0.9812645 0.9635048
#> [8] 0.9441350 0.8315633 0.7306999 0.6920672 0.6684823 0.6057737 0.5685219
#> [15] 0.5428383 0.5007069 0.4302944 0.1857177
$rotation: matrix transformasi berisi eigen vector untuk setiap PCdf_pc$rotation
#> PC1 PC2 PC3
#> Age 0.07120467 0.01012126 -0.10267024
#> Flight.Distance 0.11957453 -0.01979722 -0.01113523
#> Inflight.wifi.service 0.23103580 0.44967656 -0.04341691
#> Departure.Arrival.time.convenient 0.09918330 0.41742737 0.02621735
#> Ease.of.Online.booking 0.16263344 0.53649871 -0.04597711
#> Gate.location 0.06405416 0.43027733 -0.03357226
#> Food.and.drink 0.30215953 -0.15740790 -0.33887851
#> Online.boarding 0.28698476 0.15158750 -0.16991676
#> Seat.comfort 0.34563173 -0.15305620 -0.31386895
#> Inflight.entertainment 0.42222447 -0.17727834 -0.06461320
#> On.board.service 0.27455501 -0.07053181 0.37879139
#> Leg.room.service 0.22043041 -0.01409189 0.29502943
#> Baggage.handling 0.25483400 -0.05720561 0.43517739
#> Checkin.service 0.18117271 -0.04862467 0.13163708
#> Inflight.service 0.25874930 -0.06495181 0.44670724
#> Cleanliness 0.35165900 -0.17555336 -0.31212097
#> Departure.Delay.in.Minutes -0.03815949 0.02858195 -0.03766455
#> Arrival.Delay.in.Minutes -0.04078750 0.02840667 -0.03878262
#> PC4 PC5 PC6
#> Age 0.009334937 -0.525477561 0.3740550852
#> Flight.Distance -0.021358489 -0.487347576 -0.1323159182
#> Inflight.wifi.service 0.013757640 -0.014003609 -0.2808146857
#> Departure.Arrival.time.convenient 0.008812775 0.193658527 0.4385748056
#> Ease.of.Online.booking 0.016076055 -0.050209995 -0.1864947573
#> Gate.location 0.010141601 0.262293874 0.2178734430
#> Food.and.drink 0.003791875 0.254358523 -0.0194266751
#> Online.boarding 0.002118812 -0.425962598 -0.1591530419
#> Seat.comfort -0.006024007 -0.012032057 0.1062458161
#> Inflight.entertainment -0.030309268 0.183369287 -0.1045695031
#> On.board.service -0.028696730 -0.042136364 0.0454428573
#> Leg.room.service -0.066218375 -0.126762948 -0.2458334888
#> Baggage.handling -0.053156830 0.099220117 -0.0071149979
#> Checkin.service -0.012954580 -0.169075688 0.6127957617
#> Inflight.service -0.010270936 0.110159477 0.0007333593
#> Cleanliness -0.019326223 0.170266942 0.0540713789
#> Departure.Delay.in.Minutes -0.703433692 0.002757910 0.0102996065
#> Arrival.Delay.in.Minutes -0.703103717 0.005126555 0.0096234482
#> PC7 PC8 PC9
#> Age -0.423176292 0.55790719 -0.001343398
#> Flight.Distance -0.358733669 -0.73221346 -0.144626050
#> Inflight.wifi.service 0.206808241 0.14905443 -0.029679852
#> Departure.Arrival.time.convenient -0.159282423 -0.08967282 -0.019287178
#> Ease.of.Online.booking 0.099179398 0.01373025 -0.010366237
#> Gate.location -0.361072072 -0.17236842 0.101241481
#> Food.and.drink -0.037931382 -0.02604581 0.022168648
#> Online.boarding 0.337370889 0.08866743 -0.182309074
#> Seat.comfort -0.022935449 -0.02396641 0.041523858
#> Inflight.entertainment -0.171778033 0.07369884 -0.049803882
#> On.board.service -0.071478047 0.09998654 -0.256758258
#> Leg.room.service -0.129607636 0.03930357 0.851546367
#> Baggage.handling -0.016855469 0.02680305 -0.198245592
#> Checkin.service 0.561426546 -0.24971525 0.222833230
#> Inflight.service -0.030947241 0.03457674 -0.212436912
#> Cleanliness 0.007616781 -0.04294262 0.055584253
#> Departure.Delay.in.Minutes 0.008827023 0.01003874 -0.026388623
#> Arrival.Delay.in.Minutes 0.010267770 0.01184920 -0.024722868
#> PC10 PC11 PC12
#> Age 0.088751158 0.132087739 -0.214886848
#> Flight.Distance -0.062533189 0.054722927 -0.160004642
#> Inflight.wifi.service 0.013772163 0.002692876 -0.286489090
#> Departure.Arrival.time.convenient -0.701883140 0.117046673 0.156561236
#> Ease.of.Online.booking 0.034710802 0.005033891 -0.176119175
#> Gate.location 0.629258604 -0.154273064 0.175425058
#> Food.and.drink -0.082077631 0.044502859 -0.515638984
#> Online.boarding 0.007879992 0.030042440 0.410574608
#> Seat.comfort 0.045141729 -0.007723806 0.486880565
#> Inflight.entertainment 0.022954043 -0.039695092 -0.103797482
#> On.board.service -0.122274770 -0.784927569 -0.015432511
#> Leg.room.service -0.129469682 -0.013761805 0.086371214
#> Baggage.handling 0.120006288 0.502741712 0.069886534
#> Checkin.service 0.183332998 -0.034836877 -0.244083649
#> Inflight.service 0.093460244 0.258299478 0.006014077
#> Cleanliness -0.019127205 0.022392798 0.072500451
#> Departure.Delay.in.Minutes 0.002723661 -0.002990317 -0.008047350
#> Arrival.Delay.in.Minutes 0.003793709 -0.003822737 -0.006693515
#> PC13 PC14 PC15
#> Age 0.006797917 -0.050105404 -0.020842557
#> Flight.Distance 0.001628660 -0.083847205 0.004566956
#> Inflight.wifi.service -0.033669271 -0.379875565 0.121698219
#> Departure.Arrival.time.convenient 0.014931834 0.056262318 -0.044442241
#> Ease.of.Online.booking 0.019018502 -0.064952907 0.112546123
#> Gate.location -0.014055350 0.195143456 -0.107290862
#> Food.and.drink -0.043045264 0.594375842 0.188506063
#> Online.boarding 0.037896199 0.479635901 -0.270343377
#> Seat.comfort 0.005564194 -0.207950755 0.667013688
#> Inflight.entertainment 0.054177738 -0.183680730 -0.105369346
#> On.board.service -0.162483405 0.047600427 0.021537656
#> Leg.room.service 0.002828309 0.112086840 -0.024794779
#> Baggage.handling -0.639411739 0.035829513 0.022302884
#> Checkin.service 0.002839250 -0.071023598 0.019304714
#> Inflight.service 0.745331855 0.048045294 0.042695955
#> Cleanliness -0.011518578 -0.341885090 -0.624521445
#> Departure.Delay.in.Minutes 0.022648638 0.004548643 0.009332673
#> Arrival.Delay.in.Minutes 0.018724172 0.001079697 0.008083642
#> PC16 PC17 PC18
#> Age -0.021207248 0.053595520 0.00107385980
#> Flight.Distance 0.058247072 0.031811140 0.00256072411
#> Inflight.wifi.service 0.570961846 0.169995441 -0.00026586369
#> Departure.Arrival.time.convenient 0.098460068 -0.074877358 0.00081010975
#> Ease.of.Online.booking -0.763683036 -0.056622327 0.00027672243
#> Gate.location 0.150113097 0.007971532 -0.00010578489
#> Food.and.drink 0.034351868 0.186890193 0.00249054209
#> Online.boarding 0.127503475 -0.117160140 0.00113223231
#> Seat.comfort -0.047431154 0.121339126 0.00038714179
#> Inflight.entertainment 0.047292538 -0.800207910 -0.00207997688
#> On.board.service -0.051185874 0.165573712 0.00094644104
#> Leg.room.service 0.016290419 0.044091997 0.00021992106
#> Baggage.handling -0.045145242 0.068420366 -0.00046253290
#> Checkin.service 0.005860419 -0.127034401 0.00006791429
#> Inflight.service -0.013993199 0.185496219 0.00370216162
#> Cleanliness -0.166251083 0.412322135 -0.00017979624
#> Departure.Delay.in.Minutes 0.001240937 0.005618109 -0.70694104030
#> Arrival.Delay.in.Minutes 0.001389102 0.001657268 0.70724746617
pca$x: hasil transformasi data berupa nilai pada setiap PChead(df_pc$x)
#> PC1 PC2 PC3 PC4 PC5 PC6
#> [1,] 2.1066977 -1.2366121 -0.2322703 -0.3423958 1.91270733 -0.21531852
#> [2,] -3.3227916 1.2419314 1.2603804 0.5424145 0.08913361 -2.17382386
#> [3,] 2.1287884 -1.9262455 -0.9123639 0.4258381 0.39599178 -0.29156972
#> [4,] -1.8683588 2.4997439 0.8505465 0.2791634 1.39317601 -0.88331381
#> [5,] 0.7822565 0.2378688 -0.9405956 0.5658857 -0.83136070 0.19311936
#> [6,] -2.5926326 0.3342038 2.5400675 0.5629496 -0.49924921 -0.08667536
#> PC7 PC8 PC9 PC10 PC11 PC12
#> [1,] 1.44931115 -0.2875312 -0.4884377 -1.3390422677 0.1302104 -0.1410566
#> [2,] 0.18405680 0.5860853 1.2514371 0.2195892726 1.0153447 0.6478209
#> [3,] 1.05539377 -0.3578300 -0.5148965 0.0008271163 -0.1700469 0.4501204
#> [4,] -1.29735584 -0.1925489 1.3121993 -0.2818494168 0.4094772 0.7816546
#> [5,] 0.06304966 1.6385823 0.4779975 0.1605988085 0.4451393 0.6981710
#> [6,] 1.15404456 -0.4402043 0.3920498 -1.3255796236 0.5806908 -0.3735397
#> PC13 PC14 PC15 PC16 PC17 PC18
#> [1,] 0.60010621 -0.3891295 0.4518179 -0.44130536 0.055657989 -0.136282705
#> [2,] 0.81200696 0.4108619 -0.2428415 0.21893094 0.178516568 0.081910451
#> [3,] -0.01314923 0.5884105 -0.2076590 -0.11600291 -0.195496079 -0.001516704
#> [4,] 0.72934624 0.5665909 -0.1147616 -1.09025791 0.008636018 -0.045501138
#> [5,] -0.55121182 0.8205929 0.6798394 0.06279077 0.222871217 -0.003215653
#> [6,] 0.02134649 -0.3622343 0.1050978 0.50184920 0.208720428 -0.005437314
df_pc$center # mean dari setiap variabel pada data awal
#> Age Flight.Distance
#> -0.000000000000000012743856 0.000000000000000010592272
#> Inflight.wifi.service Departure.Arrival.time.convenient
#> -0.000000000000000031249873 -0.000000000000000095024546
#> Ease.of.Online.booking Gate.location
#> -0.000000000000000102860190 0.000000000000000182280263
#> Food.and.drink Online.boarding
#> -0.000000000000000156509252 0.000000000000000116666978
#> Seat.comfort Inflight.entertainment
#> 0.000000000000000042212353 -0.000000000000000103036486
#> On.board.service Leg.room.service
#> -0.000000000000000086993048 -0.000000000000000165991171
#> Baggage.handling Checkin.service
#> 0.000000000000000020190939 0.000000000000000052605756
#> Inflight.service Cleanliness
#> 0.000000000000000135449695 0.000000000000000001535219
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes
#> 0.000000000000000028245630 0.000000000000000033673075
df_pc$scale # sd dari setiap variabel pada data awal
#> Age Flight.Distance
#> 15.113125 997.297235
#> Inflight.wifi.service Departure.Arrival.time.convenient
#> 1.327866 1.525233
#> Ease.of.Online.booking Gate.location
#> 1.398934 1.277723
#> Food.and.drink Online.boarding
#> 1.329401 1.349433
#> Seat.comfort Inflight.entertainment
#> 1.318896 1.333030
#> On.board.service Leg.room.service
#> 1.288284 1.315409
#> Baggage.handling Checkin.service
#> 1.181051 1.265396
#> Inflight.service Cleanliness
#> 1.175603 1.312194
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes
#> 38.116737 38.698682
Menggambil hasil PCA dan menggabungkan kembali dengan variabel kategorik/factor pada data awal
Ambil hasil transformasi data (PCA)
pc_keep <- df_pc$x[, 1:11] %>%
as.data.frame()
head(pc_keep)
#> PC1 PC2 PC3 PC4 PC5 PC6
#> 1 2.1066977 -1.2366121 -0.2322703 -0.3423958 1.91270733 -0.21531852
#> 2 -3.3227916 1.2419314 1.2603804 0.5424145 0.08913361 -2.17382386
#> 3 2.1287884 -1.9262455 -0.9123639 0.4258381 0.39599178 -0.29156972
#> 4 -1.8683588 2.4997439 0.8505465 0.2791634 1.39317601 -0.88331381
#> 5 0.7822565 0.2378688 -0.9405956 0.5658857 -0.83136070 0.19311936
#> 6 -2.5926326 0.3342038 2.5400675 0.5629496 -0.49924921 -0.08667536
#> PC7 PC8 PC9 PC10 PC11
#> 1 1.44931115 -0.2875312 -0.4884377 -1.3390422677 0.1302104
#> 2 0.18405680 0.5860853 1.2514371 0.2195892726 1.0153447
#> 3 1.05539377 -0.3578300 -0.5148965 0.0008271163 -0.1700469
#> 4 -1.29735584 -0.1925489 1.3121993 -0.2818494168 0.4094772
#> 5 0.06304966 1.6385823 0.4779975 0.1605988085 0.4451393
#> 6 1.15404456 -0.4402043 0.3920498 -1.3255796236 0.5806908
Gabungkan dengan data semula
mycols <- colnames(df)
colsnumber <- match(mycols, names(airplane))
airplane_pca <- airplane %>%
select(-colsnumber) %>% # membuang 7 kolom yang digunakan saat PCA
bind_cols(pc_keep) # mengabungkan hasil PCA dengan data awal
head(airplane_pca)
#> Gender Customer.Type Type.of.Travel Class satisfaction
#> 1 Male Loyal Customer Personal Travel Eco Plus neutral or dissatisfied
#> 2 Male disloyal Customer Business travel Business neutral or dissatisfied
#> 3 Female Loyal Customer Business travel Business satisfied
#> 4 Female Loyal Customer Business travel Business neutral or dissatisfied
#> 5 Male Loyal Customer Business travel Business satisfied
#> 6 Female Loyal Customer Personal Travel Eco neutral or dissatisfied
#> PC1 PC2 PC3 PC4 PC5 PC6
#> 1 2.1066977 -1.2366121 -0.2322703 -0.3423958 1.91270733 -0.21531852
#> 2 -3.3227916 1.2419314 1.2603804 0.5424145 0.08913361 -2.17382386
#> 3 2.1287884 -1.9262455 -0.9123639 0.4258381 0.39599178 -0.29156972
#> 4 -1.8683588 2.4997439 0.8505465 0.2791634 1.39317601 -0.88331381
#> 5 0.7822565 0.2378688 -0.9405956 0.5658857 -0.83136070 0.19311936
#> 6 -2.5926326 0.3342038 2.5400675 0.5629496 -0.49924921 -0.08667536
#> PC7 PC8 PC9 PC10 PC11
#> 1 1.44931115 -0.2875312 -0.4884377 -1.3390422677 0.1302104
#> 2 0.18405680 0.5860853 1.2514371 0.2195892726 1.0153447
#> 3 1.05539377 -0.3578300 -0.5148965 0.0008271163 -0.1700469
#> 4 -1.29735584 -0.1925489 1.3121993 -0.2818494168 0.4094772
#> 5 0.06304966 1.6385823 0.4779975 0.1605988085 0.4451393
#> 6 1.15404456 -0.4402043 0.3920498 -1.3255796236 0.5806908
ggcorr(df, label = T)
ggcorr(df_pc$x, label = T)
> Untuk melihat variabel yang paling berkontribusi/yang paling banya dirangkum informasinya oleh setiap PC dapat melihat nilai absolut dari eigen vector/matriks rotasi
df_pc$rotation[,1]
#> Age Flight.Distance
#> 0.07120467 0.11957453
#> Inflight.wifi.service Departure.Arrival.time.convenient
#> 0.23103580 0.09918330
#> Ease.of.Online.booking Gate.location
#> 0.16263344 0.06405416
#> Food.and.drink Online.boarding
#> 0.30215953 0.28698476
#> Seat.comfort Inflight.entertainment
#> 0.34563173 0.42222447
#> On.board.service Leg.room.service
#> 0.27455501 0.22043041
#> Baggage.handling Checkin.service
#> 0.25483400 0.18117271
#> Inflight.service Cleanliness
#> 0.25874930 0.35165900
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes
#> -0.03815949 -0.04078750
Variabel yang paling berkontribusi (paling banyak dirangkum) oleh PC1 adalah Inflight.entertainment sebesar (42.22%).
Selain mereduksi dimensi PCA dapat digunakan untuk memvisualisasikan high dimensionality data menggunakan biplot
Kita akan membuat visualisasi high dimensional data dari 100 observasi pertama ppt
airplane_small <- head(df, 100)
melakukan PCA
airplane_pca_small <- airplane_small %>% scale() %>% prcomp()
summary(airplane_pca_small)
#> Importance of components:
#> PC1 PC2 PC3 PC4 PC5 PC6 PC7
#> Standard deviation 1.9679 1.7177 1.5294 1.4020 1.10002 0.9995 0.96817
#> Proportion of Variance 0.2152 0.1639 0.1299 0.1092 0.06723 0.0555 0.05208
#> Cumulative Proportion 0.2152 0.3791 0.5090 0.6182 0.68544 0.7409 0.79302
#> PC8 PC9 PC10 PC11 PC12 PC13 PC14
#> Standard deviation 0.87960 0.83053 0.67770 0.64973 0.60039 0.54532 0.49928
#> Proportion of Variance 0.04298 0.03832 0.02552 0.02345 0.02003 0.01652 0.01385
#> Cumulative Proportion 0.83600 0.87432 0.89984 0.92329 0.94332 0.95984 0.97369
#> PC15 PC16 PC17 PC18
#> Standard deviation 0.40975 0.38612 0.31082 0.24505
#> Proportion of Variance 0.00933 0.00828 0.00537 0.00334
#> Cumulative Proportion 0.98301 0.99130 0.99666 1.00000
membuat biplot
biplot(airplane_pca_small)
5 observasi yang merupakan outlier! - - - - -
FactoMineRFactoMineR adalah package yang dibuat untuk melakukan eksplorasi data secara multivariat.
head(airplane)
#> Gender Customer.Type Age Type.of.Travel Class Flight.Distance
#> 1 Male Loyal Customer 13 Personal Travel Eco Plus 460
#> 2 Male disloyal Customer 25 Business travel Business 235
#> 3 Female Loyal Customer 26 Business travel Business 1142
#> 4 Female Loyal Customer 25 Business travel Business 562
#> 5 Male Loyal Customer 61 Business travel Business 214
#> 6 Female Loyal Customer 26 Personal Travel Eco 1180
#> Inflight.wifi.service Departure.Arrival.time.convenient
#> 1 3 4
#> 2 3 2
#> 3 2 2
#> 4 2 5
#> 5 3 3
#> 6 3 4
#> Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1 3 1 5 3
#> 2 3 3 1 3
#> 3 2 2 5 5
#> 4 5 5 2 2
#> 5 3 3 4 5
#> 6 2 1 1 2
#> Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1 5 5 4 3
#> 2 1 1 1 5
#> 3 5 5 4 3
#> 4 2 2 2 5
#> 5 5 3 3 4
#> 6 1 1 3 4
#> Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1 4 4 5 5
#> 2 3 1 4 1
#> 3 4 4 4 5
#> 4 3 1 4 2
#> 5 4 3 3 3
#> 6 4 4 4 1
#> Departure.Delay.in.Minutes Arrival.Delay.in.Minutes satisfaction
#> 1 25 18 neutral or dissatisfied
#> 2 1 6 neutral or dissatisfied
#> 3 0 0 satisfied
#> 4 11 9 neutral or dissatisfied
#> 5 0 0 satisfied
#> 6 0 0 neutral or dissatisfied
unique(airplane$Inflight.service)
#> [1] 5 4 3 1 2 0
unique(airplane$Inflight.entertainment)
#> [1] 5 1 2 3 4 0
airplane_facto <- airplane %>%
select_if(is.integer) %>%
mutate(Inflight.service = as.factor(Inflight.service),
Inflight.entertainment= as.factor(Inflight.entertainment),
satisfaction_score = airplane$satisfaction)
head(airplane_facto)
#> Age Flight.Distance Inflight.wifi.service Departure.Arrival.time.convenient
#> 1 13 460 3 4
#> 2 25 235 3 2
#> 3 26 1142 2 2
#> 4 25 562 2 5
#> 5 61 214 3 3
#> 6 26 1180 3 4
#> Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
#> 1 3 1 5 3
#> 2 3 3 1 3
#> 3 2 2 5 5
#> 4 5 5 2 2
#> 5 3 3 4 5
#> 6 2 1 1 2
#> Seat.comfort Inflight.entertainment On.board.service Leg.room.service
#> 1 5 5 4 3
#> 2 1 1 1 5
#> 3 5 5 4 3
#> 4 2 2 2 5
#> 5 5 3 3 4
#> 6 1 1 3 4
#> Baggage.handling Checkin.service Inflight.service Cleanliness
#> 1 4 4 5 5
#> 2 3 1 4 1
#> 3 4 4 4 5
#> 4 3 1 4 2
#> 5 4 3 3 3
#> 6 4 4 4 1
#> Departure.Delay.in.Minutes satisfaction_score
#> 1 25 neutral or dissatisfied
#> 2 1 neutral or dissatisfied
#> 3 0 satisfied
#> 4 11 neutral or dissatisfied
#> 5 0 satisfied
#> 6 0 neutral or dissatisfied
FactoMineR menyediakan fungsi untuk membuat PCA yaitu PCA() dan memvisualisasikan PCA yaitu plot.PCA(). Visualisasi PCA dengan FactoMineR dapat melibatkan variabel kategorik untuk membedakan informasi kategori dengan cara memberikan warna pada titik-titik amatan sehingga plot lebih informatif.
# index kolom kategorik
qualivar <- c(10,15,18)
Notes:
prcomp(): secara default membuat PC sejumlah variabel awalPCA(): secara default hanya akan membuatkan 5 PC awal (PC1-PC5), jika ingin dihasilkan PC sejumlah variabel awal dapat menambahkan parameter ncp = jumlah variabel awal# membuat PCA
library(FactoMineR)
airplane_pca_facto <- PCA(X = airplane_facto, # data
scale.unit = T, # data di-scale terlebih dahulu
quali.sup = qualivar, # index kolom kategorik di data
graph = F, # agar tidak dibuatkan plot, pembuatan plot di plot.PCA() saja
ncp = 11) # jumlah PC yang dihasilkan secara default = 5
plot.PCA()plot.PCA(airplane_pca_facto, # objek hasil PCA
choix = "ind", # plot individual (observasi)
select = "contrib3", # menampilkan index 5 observasi outlier terluar
habillage = 18, # mewarnai titik-titik amatan berdasarkan kolom kategorik
invisible = "quali") # menghilangkan label kolom kategori (mengganggu visual)
Dapat dilihat persebaran data tentang satisfaction_score menggunakan scatter plotm dapat dilihat bahwa persebaran customer satisfied dominan kearan kanan atas dan kanan bawah dari PCA Graph Individuals.
Selanjutnya kita lanjutkan menampilkan PCA Graph of Variables agar dapat diketahui ada variabel apa saja yang berada di daerah kanan atas dan kanan bawah untuk mengetahui variable apa saja yang memiliki korelasi yang tinggi dalam menentukan customer satisfaction.
plot.PCA(airplane_pca_facto,
choix = "var") # plot variable
Berdasarkan PCA graph of variables dapat kita tentukan bahwa, yang menjadi faktor utama customer satisfaction adalah Cleanliness,Seat.comfort,Online.boarding. Untu dataset yang digunakan tidak dapat dilanjutkan ke proses k-means, dikarenakan tidak adanya variable yang tidak berulang(special). Untuk dataset ini kita dapat mengambil insight business untuk mengetahui variable/faktor apa saja yang memeiliki korelasi yang tinggi untuk mendapatkan customer satisfaction.