Method(s)
Load necessary libraries
library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(pscl)
## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.
Load the dataset
data <- read_excel("airline_passenger_satisfaction.xlsx")
Describing the variables
Dependent Variable:
Satisfaction ("Satisfied", "Neutral or Dissatisfied")
Converted to Binary:
1: Satisfied
0: Neutral or Dissatisfied
Independent Variables:
Departure.and.Arrival.Time.Convenience, Ease.of.Online.Booking, Check.in.Service, In-Flight Service and Comfort, On.board.Service, Online.Boarding, Seat.Comfort, Leg.Room.Service, Cleanliness, Food.and.Drink, In.flight.Service, In.flight.Wifi.Service,In.flight.Entertainment, Baggage.Handling, Gate.Location
(Excluded delays and other obvious variables from factors that will relate to dissatisfaction.)
Converting columns to factors to make the data clear and creat bar
chart: R will treat these score variables as categorical rather than
numeric data.
data$Satisfaction <- as.factor(data$Satisfaction)
data$Departure.and.Arrival.Time.Convenience<- as.factor(data$Departure.and.Arrival.Time.Convenience)
data$Ease.of.Online.Booking <- as.factor(data$Ease.of.Online.Booking)
data$Check.in.Service <- as.factor(data$Check.in.Service)
data$Online.Boarding <- as.factor(data$Ease.of.Online.Booking)
data$On.board.Service <- as.factor(data$On.board.Service)
data$Seat.Comfort <- as.factor(data$Seat.Comfort)
data$Leg.Room.Service <- as.factor(data$Leg.Room.Service)
data$Food.and.Drink <- as.factor(data$Food.and.Drink)
data$In.flight.Service <- as.factor(data$In.flight.Service)
data$In.flight.Wifi.Service <- as.factor(data$In.flight.Wifi.Service)
data$In.flight.Entertainment<- as.factor(data$In.flight.Entertainment)
data$Baggage.Handling <- as.factor(data$Baggage.Handling)
data$Gate.Location <- as.factor(data$Gate.Location)
data$Cleanliness <- as.factor(data$Cleanliness)
Sumarry stats
summary(data)
## ID Gender Age Customer Type
## Min. : 1 Length:129880 Min. : 7.00 Length:129880
## 1st Qu.: 32471 Class :character 1st Qu.:27.00 Class :character
## Median : 64940 Mode :character Median :40.00 Mode :character
## Mean : 64940 Mean :39.43
## 3rd Qu.: 97410 3rd Qu.:51.00
## Max. :129880 Max. :85.00
##
## Type of Travel Class Flight Distance Departure Delay
## Length:129880 Length:129880 Min. : 31 Min. : 0.00
## Class :character Class :character 1st Qu.: 414 1st Qu.: 0.00
## Mode :character Mode :character Median : 844 Median : 0.00
## Mean :1190 Mean : 14.71
## 3rd Qu.:1744 3rd Qu.: 12.00
## Max. :4983 Max. :1592.00
##
## Arrival Delay Departure.and.Arrival.Time.Convenience
## Min. : 0.00 0: 6681
## 1st Qu.: 0.00 1:19409
## Median : 0.00 2:21534
## Mean : 15.09 3:22378
## 3rd Qu.: 13.00 4:31880
## Max. :1584.00 5:27998
## NA's :393
## Ease.of.Online.Booking Check.in.Service Online.Boarding Gate.Location
## 0: 5682 0: 1 0: 5682 0: 1
## 1:21886 1:16108 1:21886 1:21991
## 2:30051 2:16102 2:30051 2:24296
## 3:30393 3:35453 3:30393 3:35717
## 4:24444 4:36333 4:24444 4:30466
## 5:17424 5:25883 5:17424 5:17409
##
## On.board.Service Seat.Comfort Leg.Room.Service Cleanliness Food.and.Drink
## 0: 5 0: 1 0: 598 0: 14 0: 132
## 1:14787 1:15108 1:12895 1:16729 1:16051
## 2:18351 2:18529 2:24540 2:20113 2:27383
## 3:28542 3:23328 3:25056 3:30639 3:27794
## 4:38703 4:39756 4:35886 4:33969 4:30563
## 5:29492 5:33158 5:30905 5:28416 5:27957
##
## In.flight.Service In.flight.Wifi.Service In.flight.Entertainment
## 0: 5 0: 3916 0: 18
## 1: 8862 1:22328 1:15675
## 2:14308 2:32320 2:21968
## 3:25316 3:32185 3:23884
## 4:47323 4:24775 4:36791
## 5:34066 5:14356 5:31544
##
## Baggage.Handling Satisfaction
## 1: 9028 Neutral or Dissatisfied:73452
## 2:14362 Satisfied :56428
## 3:25851
## 4:46761
## 5:33878
##
##
Visualization: Satisfaction factors of categorical data (5
scores)
Filter data for satisfied passengers only
satisfied_data <- subset(data, Satisfaction == "Satisfied")
Create a bar chart of satisfaction scores for 14 factors
#Cleanliness
ggplot(data = satisfied_data, aes(x = factor(Cleanliness))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Cleanliness Scores for Satisfied Passengers",
x = "Cleanliness Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Departure.and.Arrival.Time.Convenience
ggplot(data = satisfied_data, aes(x = factor(Departure.and.Arrival.Time.Convenience))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Departure.and.Arrival.Time.Convenience Scores for Satisfied Passengers",
x = "Departure.and.Arrival.Time.Convenience",
y = "Count") +
theme_minimal() +
coord_flip()

#Ease.of.Online.Booking
ggplot(data = satisfied_data, aes(x = factor(Ease.of.Online.Booking))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Ease.of.Online.Booking Scores for Satisfied Passengers",
x = "Ease.of.Online.Booking Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Check.in.Service
ggplot(data = satisfied_data, aes(x = factor(Check.in.Service))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Check.in.Service Scores for Satisfied Passengers",
x = "Check.in.Service Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Online.Boarding
ggplot(data = satisfied_data, aes(x = factor(Online.Boarding))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Online.Boarding Scores for Satisfied Passengers",
x = "Online.Boarding Score",
y = "Count") +
theme_minimal() +
coord_flip()

#On.board.Service
ggplot(data = satisfied_data, aes(x = factor(On.board.Service))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "On.board.Service Scores for Satisfied Passengers",
x = "On.board.Service Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Seat.Comfort
ggplot(data = satisfied_data, aes(x = factor(Seat.Comfort))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Seat.Comfort Scores for Satisfied Passengers",
x = "Seat.Comfort Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Leg.Room.Service
ggplot(data = satisfied_data, aes(x = factor(Leg.Room.Service))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Leg.Room.Service Scores for Satisfied Passengers",
x = "Leg.Room.Service Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Food.and.Drink
ggplot(data = satisfied_data, aes(x = factor(Food.and.Drink))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Food.and.Drink Scores for Satisfied Passengers",
x = "Food.and.Drink Score",
y = "Count") +
theme_minimal() +
coord_flip()

#In.flight.Service
ggplot(data = satisfied_data, aes(x = factor(In.flight.Service))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "In.flight.Service Scores for Satisfied Passengers",
x = "In.flight.Service Score",
y = "Count") +
theme_minimal() +
coord_flip()

#In.flight.Wifi.Service
ggplot(data = satisfied_data, aes(x = factor(In.flight.Wifi.Service))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "In.flight.Wifi.Service Scores for Satisfied Passengers",
x = "In.flight.Wifi.Service Score",
y = "Count") +
theme_minimal() +
coord_flip()

#In.flight.Entertainment
ggplot(data = satisfied_data, aes(x = factor(In.flight.Entertainment))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "In.flight.Entertainment Scores for Satisfied Passengers",
x = "In.flight.Entertainment Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Baggage.Handling
ggplot(data = satisfied_data, aes(x = factor(Baggage.Handling))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Baggage.Handling Scores for Satisfied Passengers",
x = "Baggage.Handling Score",
y = "Count") +
theme_minimal() +
coord_flip()

#Gate.Location
ggplot(data = satisfied_data, aes(x = factor(Gate.Location))) +
geom_bar(stat = "count", width = 0.5, fill = 'steelblue') +
labs(title = "Gate.Location Scores for Satisfied Passengers",
x = "Gate.Location Score",
y = "Count") +
theme_minimal() +
coord_flip()

Interpretation: Factors that have high proportion of scores below 2, such as 'Departure.and.Arrival.Time.Convenience', 'Ease.of.Online.Booking', 'Online.Boarding', 'Gate.Location' are better to have improvement.
Convert Satisfaction to binary: 1 for “Satisfied”, 0 for “Neutral or
Dissatisfied”
data$Satisfaction_Binary <- ifelse(data$Satisfaction == "Satisfied", 1, 0)
Convert factor columns to numeric
data$Departure.and.Arrival.Time.Convenience <- as.numeric(as.character(data$Departure.and.Arrival.Time.Convenience))
data$Ease.of.Online.Booking <- as.numeric(as.character(data$Ease.of.Online.Booking))
data$Check.in.Service <- as.numeric(as.character(data$Check.in.Service))
data$On.board.Service <- as.numeric(as.character(data$On.board.Service))
data$Online.Boarding <- as.numeric(as.character(data$Online.Boarding))
data$Seat.Comfort <- as.numeric(as.character(data$Seat.Comfort))
data$Leg.Room.Service <- as.numeric(as.character(data$Leg.Room.Service))
data$Cleanliness <- as.numeric(as.character(data$Cleanliness))
data$Food.and.Drink <- as.numeric(as.character(data$Food.and.Drink))
data$In.flight.Service <- as.numeric(as.character(data$In.flight.Service))
data$In.flight.Wifi.Service <- as.numeric(as.character(data$In.flight.Wifi.Service))
data$In.flight.Entertainment <- as.numeric(as.character(data$In.flight.Entertainment))
data$Baggage.Handling <- as.numeric(as.character(data$Baggage.Handling))
Calculate correlation between Satisfaction_Binary and Other
factors
corr<-cor(data[, c("Departure.and.Arrival.Time.Convenience", "Ease.of.Online.Booking", "Check.in.Service", "On.board.Service","Online.Boarding","Seat.Comfort", "Leg.Room.Service", "Cleanliness", "Food.and.Drink", "In.flight.Service", "In.flight.Wifi.Service", "In.flight.Entertainment", "Baggage.Handling","Satisfaction_Binary")])
corr
## Departure.and.Arrival.Time.Convenience
## Departure.and.Arrival.Time.Convenience 1.0000000000
## Ease.of.Online.Booking 0.4376196545
## Check.in.Service 0.0911317589
## On.board.Service 0.0672969787
## Online.Boarding 0.4376196545
## Seat.Comfort 0.0086664448
## Leg.Room.Service 0.0106171078
## Cleanliness 0.0098620846
## Food.and.Drink 0.0006866832
## In.flight.Service 0.0721948030
## In.flight.Wifi.Service 0.3449151814
## In.flight.Entertainment -0.0083800141
## Baggage.Handling 0.0708330359
## Satisfaction_Binary -0.0542697105
## Ease.of.Online.Booking Check.in.Service
## Departure.and.Arrival.Time.Convenience 0.437619655 0.091131759
## Ease.of.Online.Booking 1.000000000 0.008819308
## Check.in.Service 0.008819308 1.000000000
## On.board.Service 0.039064190 0.244618669
## Online.Boarding 1.000000000 0.008819308
## Seat.Comfort 0.028560733 0.189979117
## Leg.Room.Service 0.109449655 0.152693216
## Cleanliness 0.015124786 0.176658031
## Food.and.Drink 0.030513982 0.085197877
## In.flight.Service 0.035372567 0.237601243
## In.flight.Wifi.Service 0.714806849 0.043762366
## In.flight.Entertainment 0.046563505 0.119554033
## Baggage.Handling 0.039148282 0.234503128
## Satisfaction_Binary 0.168877139 0.237252360
## On.board.Service Online.Boarding
## Departure.and.Arrival.Time.Convenience 0.06729698 0.437619655
## Ease.of.Online.Booking 0.03906419 1.000000000
## Check.in.Service 0.24461867 0.008819308
## On.board.Service 1.00000000 0.039064190
## Online.Boarding 0.03906419 1.000000000
## Seat.Comfort 0.13054488 0.028560733
## Leg.Room.Service 0.35772132 0.109449655
## Cleanliness 0.12208376 0.015124786
## Food.and.Drink 0.05740401 0.030513982
## In.flight.Service 0.55156883 0.035372567
## In.flight.Wifi.Service 0.11992768 0.714806849
## In.flight.Entertainment 0.41857358 0.046563505
## Baggage.Handling 0.52029553 0.039148282
## Satisfaction_Binary 0.32220482 0.168877139
## Seat.Comfort Leg.Room.Service
## Departure.and.Arrival.Time.Convenience 0.008666445 0.01061711
## Ease.of.Online.Booking 0.028560733 0.10944966
## Check.in.Service 0.189979117 0.15269322
## On.board.Service 0.130544875 0.35772132
## Online.Boarding 0.028560733 0.10944966
## Seat.Comfort 1.000000000 0.10427240
## Leg.Room.Service 0.104272400 1.00000000
## Cleanliness 0.679613003 0.09669472
## Food.and.Drink 0.575846177 0.03317279
## In.flight.Service 0.068842149 0.36956948
## In.flight.Wifi.Service 0.121513245 0.16031696
## In.flight.Entertainment 0.611836657 0.30039744
## Baggage.Handling 0.074619552 0.37145468
## Satisfaction_Binary 0.348829346 0.31242382
## Cleanliness Food.and.Drink
## Departure.and.Arrival.Time.Convenience 0.009862085 0.0006866832
## Ease.of.Online.Booking 0.015124786 0.0305139818
## Check.in.Service 0.176658031 0.0851978765
## On.board.Service 0.122083757 0.0574040099
## Online.Boarding 0.015124786 0.0305139818
## Seat.Comfort 0.679613003 0.5758461771
## Leg.Room.Service 0.096694724 0.0331727940
## Cleanliness 1.000000000 0.6580539298
## Food.and.Drink 0.658053930 1.0000000000
## In.flight.Service 0.090355980 0.0352096628
## In.flight.Wifi.Service 0.131299526 0.1322138724
## In.flight.Entertainment 0.692510538 0.6234609372
## Baggage.Handling 0.097071490 0.0353207442
## Satisfaction_Binary 0.307034671 0.2113402076
## In.flight.Service In.flight.Wifi.Service
## Departure.and.Arrival.Time.Convenience 0.07219480 0.34491518
## Ease.of.Online.Booking 0.03537257 0.71480685
## Check.in.Service 0.23760124 0.04376237
## On.board.Service 0.55156883 0.11992768
## Online.Boarding 0.03537257 0.71480685
## Seat.Comfort 0.06884215 0.12151324
## Leg.Room.Service 0.36956948 0.16031696
## Cleanliness 0.09035598 0.13129953
## Food.and.Drink 0.03520966 0.13221387
## In.flight.Service 1.00000000 0.11002855
## In.flight.Wifi.Service 0.11002855 1.00000000
## In.flight.Entertainment 0.40609361 0.20780165
## Baggage.Handling 0.62923720 0.12037590
## Satisfaction_Binary 0.24491784 0.28346023
## In.flight.Entertainment Baggage.Handling
## Departure.and.Arrival.Time.Convenience -0.008380014 0.07083304
## Ease.of.Online.Booking 0.046563505 0.03914828
## Check.in.Service 0.119554033 0.23450313
## On.board.Service 0.418573575 0.52029553
## Online.Boarding 0.046563505 0.03914828
## Seat.Comfort 0.611836657 0.07461955
## Leg.Room.Service 0.300397442 0.37145468
## Cleanliness 0.692510538 0.09707149
## Food.and.Drink 0.623460937 0.03532074
## In.flight.Service 0.406093608 0.62923720
## In.flight.Wifi.Service 0.207801648 0.12037590
## In.flight.Entertainment 1.000000000 0.37912276
## Baggage.Handling 0.379122757 1.00000000
## Satisfaction_Binary 0.398233651 0.24867992
## Satisfaction_Binary
## Departure.and.Arrival.Time.Convenience -0.05426971
## Ease.of.Online.Booking 0.16887714
## Check.in.Service 0.23725236
## On.board.Service 0.32220482
## Online.Boarding 0.16887714
## Seat.Comfort 0.34882935
## Leg.Room.Service 0.31242382
## Cleanliness 0.30703467
## Food.and.Drink 0.21134021
## In.flight.Service 0.24491784
## In.flight.Wifi.Service 0.28346023
## In.flight.Entertainment 0.39823365
## Baggage.Handling 0.24867992
## Satisfaction_Binary 1.00000000
Interpretation: In.flight.Entertainment (0.40), Seat.Comfort (0.35), Leg.Room.Service (0.31), On.board.Service (0.32), Cleanliness (0.31) are the top 5 related factores.
Logistic regression model
model <- glm(Satisfaction_Binary ~ In.flight.Entertainment + Seat.Comfort +
Leg.Room.Service + On.board.Service + Cleanliness,
family = "binomial", data = data,)
summary(model)
##
## Call:
## glm(formula = Satisfaction_Binary ~ In.flight.Entertainment +
## Seat.Comfort + Leg.Room.Service + On.board.Service + Cleanliness,
## family = "binomial", data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.240091 0.033127 -158.18 <2e-16 ***
## In.flight.Entertainment 0.185839 0.007925 23.45 <2e-16 ***
## Seat.Comfort 0.392795 0.006979 56.29 <2e-16 ***
## Leg.Room.Service 0.377576 0.005475 68.96 <2e-16 ***
## On.board.Service 0.358746 0.006020 59.59 <2e-16 ***
## Cleanliness 0.124844 0.007776 16.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 177814 on 129879 degrees of freedom
## Residual deviance: 141796 on 129874 degrees of freedom
## AIC: 141808
##
## Number of Fisher Scoring iterations: 4
Interpretation: The estimated logistic regression equation is E(y) = e-5.24+0.19x1+0.39x2+0.38x3+0.36x4+0.12x5/1+e-5.24+0.19x1+0.39x2+0.38x3+0.36x4+0.12x5.
Calculate Odds Ratios
odds_ratios <- exp(coefficients(model))
odds_ratios
## (Intercept) In.flight.Entertainment Seat.Comfort
## 0.005299773 1.204228185 1.481114008
## Leg.Room.Service On.board.Service Cleanliness
## 1.458744715 1.431532725 1.132971908
Interpretation:
For each one-unit increase in the in flight entertainment, the odds of being satisfied increase by 20.42%.
For each one-unit increase in the seat comfort, the odds of being satisfied increase by 48.11%.
For each one-unit increase in the leg room service, the odds of being satisfied increase by 45.87%.
For each one-unit increase in the on board service, the odds of being satisfied increase by 43.15%.
For each one-unit increase in the cleanliness, the odds of being satisfied increase by 13.30%.
Calculate McFadden’s R-squared
pR2(model)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -7.089797e+04 -8.890703e+04 3.601812e+04 2.025606e-01 2.421869e-01
## r2CU
## 3.247974e-01
Interpretation: McFadden's R-squared value is 0.20, which indicates a moderate to good model fit in the context of logistic regression.