Which factors are driving consumer experience? This is a common business question that can be answered by key driver analysis. Let’s dive in!
Total.data<- read.csv("g:/Portfolio Projects/RWA/Data/airline_passenger_satisfaction.csv")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## embedded nul(s) found in input
str(Total.data)
## 'data.frame': 129558 obs. of 25 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Gender : chr "Male" "Female" "Male" "Male" ...
## $ Age : int 48 35 41 50 49 43 43 60 50 38 ...
## $ Customer.Type : chr "First-time" "Returning" "Returning" "Returning" ...
## $ Type.of.Travel : chr "Business" "Business" "Business" "Business" ...
## $ Class : chr "Business" "Business" "Business" "Business" ...
## $ Flight.Distance : int 821 821 853 1905 3470 3788 1963 853 2607 2822 ...
## $ Departure.Delay : int 2 26 0 0 0 0 0 0 0 13 ...
## $ Arrival.Delay : int 5 39 0 0 1 0 0 3 0 0 ...
## $ Departure.and.Arrival.Time.Convenience: int 3 2 4 2 3 4 3 3 1 2 ...
## $ Ease.of.Online.Booking : chr "3" "2" "4" "2" ...
## $ Check.in.Service : int 4 3 4 3 3 3 4 3 3 3 ...
## $ Online.Boarding : int 3 5 5 4 5 5 4 4 2 5 ...
## $ Gate.Location : int 3 2 4 2 3 4 3 4 1 2 ...
## $ On.board.Service : int 3 5 3 5 3 4 5 3 4 5 ...
## $ Seat.Comfort : int 5 4 5 5 4 4 5 4 3 4 ...
## $ Leg.Room.Service : int 2 5 3 5 4 4 5 4 4 5 ...
## $ Cleanliness : int 5 5 5 4 5 3 4 4 3 4 ...
## $ Food.and.Drink : int 5 3 5 4 4 3 5 4 3 2 ...
## $ In.flight.Service : int 5 5 3 5 3 4 5 3 4 5 ...
## $ In.flight.Wifi.Service : int 3 2 4 2 3 4 3 4 4 2 ...
## $ In.flight.Entertainment : int 5 5 3 5 3 4 5 3 4 5 ...
## $ Baggage.Handling : int 5 5 3 5 3 4 5 3 4 5 ...
## $ Satisfaction : chr "Neutral or Dissatisfied" "Satisfied" "Satisfied" "Satisfied" ...
## $ Satisfaction.Input : int 2 5 4 5 4 5 4 5 2 5 ...
# RENAME DEPENDENT VAR TO "DVAR"
Total.data<- rename(Total.data, DVAR=Satisfaction.Input)
# examine column labels
names(Total.data)
## [1] "ID"
## [2] "Gender"
## [3] "Age"
## [4] "Customer.Type"
## [5] "Type.of.Travel"
## [6] "Class"
## [7] "Flight.Distance"
## [8] "Departure.Delay"
## [9] "Arrival.Delay"
## [10] "Departure.and.Arrival.Time.Convenience"
## [11] "Ease.of.Online.Booking"
## [12] "Check.in.Service"
## [13] "Online.Boarding"
## [14] "Gate.Location"
## [15] "On.board.Service"
## [16] "Seat.Comfort"
## [17] "Leg.Room.Service"
## [18] "Cleanliness"
## [19] "Food.and.Drink"
## [20] "In.flight.Service"
## [21] "In.flight.Wifi.Service"
## [22] "In.flight.Entertainment"
## [23] "Baggage.Handling"
## [24] "Satisfaction"
## [25] "DVAR"
# view the data table
kable(Total.data[1:5, 1:10], caption = "Examine the source data")
ID | Gender | Age | Customer.Type | Type.of.Travel | Class | Flight.Distance | Departure.Delay | Arrival.Delay | Departure.and.Arrival.Time.Convenience |
---|---|---|---|---|---|---|---|---|---|
1 | Male | 48 | First-time | Business | Business | 821 | 2 | 5 | 3 |
2 | Female | 35 | Returning | Business | Business | 821 | 26 | 39 | 2 |
3 | Male | 41 | Returning | Business | Business | 853 | 0 | 0 | 4 |
4 | Male | 50 | Returning | Business | Business | 1905 | 0 | 0 | 2 |
5 | Female | 49 | Returning | Business | Business | 3470 | 0 | 1 | 3 |
# create correlation matrix
res<- Total.data[c("DVAR","Departure.and.Arrival.Time.Convenience","Ease.of.Online.Booking","Check.in.Service","Online.Boarding",
"Gate.Location","On.board.Service","Seat.Comfort","Leg.Room.Service","Cleanliness","Food.and.Drink",
"In.flight.Service","In.flight.Wifi.Service","In.flight.Entertainment","Baggage.Handling")]
# ensure all variables are numeric
res<- res %>% mutate_if(is.character, as.numeric)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
# correlation matrix
res<- cor(res, use = "complete.obs")
res<- round(res, digits = 4)
# set predictors and response
ys<- as.numeric(which(colnames(res)=="DVAR"))
xs<- as.numeric(which(colnames(res)=="DVAR")+1):ncol(res)
# RWA
results<- relWt(res, ys, xs)
results_final_RWA<- results$eps
# rescale Everything
Rescaled<- sweep(results_final_RWA, 1, colSums(results_final_RWA),"/")
Rescaled<- rename(Rescaled, Rescaled_Total=EPS)
results_final_RWA<- cbind(results_final_RWA, Rescaled)
results_final_RWA<- rename(results_final_RWA, Total=EPS)
# results are ratio-scaled to sum to 100%
kable(results_final_RWA, caption = "Ratio-scaled results for total sample")
Total | Rescaled_Total | |
---|---|---|
Departure.and.Arrival.Time.Convenience | 0.0085802 | 0.0284155 |
Ease.of.Online.Booking | 0.0089013 | 0.0294789 |
Check.in.Service | 0.0156018 | 0.0516693 |
Online.Boarding | 0.0995493 | 0.3296829 |
Gate.Location | 0.0010319 | 0.0034173 |
On.board.Service | 0.0245334 | 0.0812487 |
Seat.Comfort | 0.0252866 | 0.0837430 |
Leg.Room.Service | 0.0297378 | 0.0984842 |
Cleanliness | 0.0155009 | 0.0513351 |
Food.and.Drink | 0.0067804 | 0.0224549 |
In.flight.Service | 0.0096491 | 0.0319553 |
In.flight.Wifi.Service | 0.0213633 | 0.0707499 |
In.flight.Entertainment | 0.0252480 | 0.0836153 |
Baggage.Handling | 0.0101909 | 0.0337497 |
# RENAME audience VAR TO generic "audience"
Total.data.cuts<- rename(Total.data, audience=Customer.Type)
# store labels from character variable
audience.labels<- unique(Total.data.cuts$audience)
# convert character variable to factor
Total.data.cuts$audience<- Total.data.cuts$audience %>%
as_factor() %>%
as.numeric()
cuts <- Total.data.cuts %>% select(audience) %>% distinct() %>% pull() %>% sort()
# run loop
for (x in cuts) {
# create covariance matrix
audience_res <- Total.data.cuts
audience_res <- filter(audience_res, audience == x)
audience_res <- audience_res[c("DVAR","Departure.and.Arrival.Time.Convenience","Ease.of.Online.Booking","Check.in.Service","Online.Boarding",
"Gate.Location","On.board.Service","Seat.Comfort","Leg.Room.Service","Cleanliness","Food.and.Drink",
"In.flight.Service","In.flight.Wifi.Service","In.flight.Entertainment","Baggage.Handling")]
# ensure all variables are numeric
audience_res<- audience_res %>% mutate_if(is.character, as.numeric)
audience_res <- cor(audience_res,use = "complete.obs")
audience_res <- round(audience_res,digits = 4)
# set predictors and response
audience_ys<- as.numeric(which(colnames(audience_res)=="DVAR"))
audience_xs<- as.numeric(which(colnames(audience_res)=="DVAR")+1):ncol(audience_res)
if (x == 1) {
# for first iteration, create results dataframe
audience_results <- relWt(audience_res, audience_ys, audience_xs)
audience_results_eps <- audience_results[["eps"]]
audience_results_final_RWA <- cbind(Attributes = rownames(audience_results_eps), audience_results_eps) %>%
rename(!!paste0(audience.labels[x]) := EPS)
rownames(audience_results_final_RWA) <- 1:nrow(audience_results_eps)
# rescale Everything
Rescaled<- sweep(audience_results_final_RWA[paste0(audience.labels[x])], 1, colSums(audience_results_final_RWA[paste0(audience.labels[x])]),"/")
Rescaled<- Rescaled %>% rename(!!paste0("Rescaled_",audience.labels[x]) := audience.labels[x])
audience_results_final_RWA<- cbind(audience_results_final_RWA, Rescaled)
} else {
# for following iterations, just add results column for audience
audience_results <- relWt(audience_res, audience_ys, audience_xs)
audience_results_eps <- audience_results[["eps"]] %>%
rename(!!paste0(audience.labels[x]) := EPS)
audience_results_final_RWA <- cbind(audience_results_final_RWA, audience_results_eps)
# rescale Everything
Rescaled<- sweep(audience_results_final_RWA[paste0(audience.labels[x])], 1, colSums(audience_results_final_RWA[paste0(audience.labels[x])]),"/")
Rescaled<- Rescaled %>% rename(!!paste0("Rescaled_",audience.labels[x]) := audience.labels[x])
audience_results_final_RWA<- cbind(audience_results_final_RWA, Rescaled)
}
}
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
# results are ratio-scaled to sum to 100%
kable(audience_results_final_RWA, caption = "Ratio-scaled results for customer type")
Attributes | First-time | Rescaled_First-time | Returning | Rescaled_Returning |
---|---|---|---|---|
Departure.and.Arrival.Time.Convenience | 0.0023367 | 0.0133100 | 0.0222954 | 0.0665209 |
Ease.of.Online.Booking | 0.0366404 | 0.2087082 | 0.0063041 | 0.0188090 |
Check.in.Service | 0.0161606 | 0.0920528 | 0.0152970 | 0.0456403 |
Online.Boarding | 0.0311006 | 0.1771529 | 0.0888067 | 0.2649646 |
Gate.Location | 0.0028289 | 0.0161135 | 0.0033788 | 0.0100810 |
On.board.Service | 0.0135534 | 0.0772017 | 0.0249313 | 0.0743854 |
Seat.Comfort | 0.0001362 | 0.0007757 | 0.0326329 | 0.0973639 |
Leg.Room.Service | 0.0021022 | 0.0119744 | 0.0368760 | 0.1100236 |
Cleanliness | 0.0004076 | 0.0023220 | 0.0227108 | 0.0677600 |
Food.and.Drink | 0.0004985 | 0.0028398 | 0.0087306 | 0.0260488 |
In.flight.Service | 0.0109950 | 0.0626286 | 0.0106649 | 0.0318200 |
In.flight.Wifi.Service | 0.0467648 | 0.2663781 | 0.0178001 | 0.0531085 |
In.flight.Entertainment | 0.0003608 | 0.0020551 | 0.0335170 | 0.1000018 |
Baggage.Handling | 0.0116724 | 0.0664872 | 0.0112187 | 0.0334721 |
# combine the results from total sample and customer type
results_RWA<- cbind(audience_results_final_RWA, results_final_RWA)
# let's take a look at the final data table we'll export to Excel
kable(results_RWA, caption = "Final RWA results")
Attributes | First-time | Rescaled_First-time | Returning | Rescaled_Returning | Total | Rescaled_Total |
---|---|---|---|---|---|---|
Departure.and.Arrival.Time.Convenience | 0.0023367 | 0.0133100 | 0.0222954 | 0.0665209 | 0.0085802 | 0.0284155 |
Ease.of.Online.Booking | 0.0366404 | 0.2087082 | 0.0063041 | 0.0188090 | 0.0089013 | 0.0294789 |
Check.in.Service | 0.0161606 | 0.0920528 | 0.0152970 | 0.0456403 | 0.0156018 | 0.0516693 |
Online.Boarding | 0.0311006 | 0.1771529 | 0.0888067 | 0.2649646 | 0.0995493 | 0.3296829 |
Gate.Location | 0.0028289 | 0.0161135 | 0.0033788 | 0.0100810 | 0.0010319 | 0.0034173 |
On.board.Service | 0.0135534 | 0.0772017 | 0.0249313 | 0.0743854 | 0.0245334 | 0.0812487 |
Seat.Comfort | 0.0001362 | 0.0007757 | 0.0326329 | 0.0973639 | 0.0252866 | 0.0837430 |
Leg.Room.Service | 0.0021022 | 0.0119744 | 0.0368760 | 0.1100236 | 0.0297378 | 0.0984842 |
Cleanliness | 0.0004076 | 0.0023220 | 0.0227108 | 0.0677600 | 0.0155009 | 0.0513351 |
Food.and.Drink | 0.0004985 | 0.0028398 | 0.0087306 | 0.0260488 | 0.0067804 | 0.0224549 |
In.flight.Service | 0.0109950 | 0.0626286 | 0.0106649 | 0.0318200 | 0.0096491 | 0.0319553 |
In.flight.Wifi.Service | 0.0467648 | 0.2663781 | 0.0178001 | 0.0531085 | 0.0213633 | 0.0707499 |
In.flight.Entertainment | 0.0003608 | 0.0020551 | 0.0335170 | 0.1000018 | 0.0252480 | 0.0836153 |
Baggage.Handling | 0.0116724 | 0.0664872 | 0.0112187 | 0.0334721 | 0.0101909 | 0.0337497 |
# SAVE OUTPUTS
wb <- createWorkbook()
addWorksheet(wb,"RWA")
writeData(wb,sheet = "RWA",results_RWA, rowNames = FALSE)
############################
# SAVE FILE
saveWorkbook(wb, "g:/Portfolio Projects/RWA/RWA Results v1_.xlsx", overwrite = TRUE)
# create plots
colnames(results_RWA)
## [1] "Attributes" "First-time" "Rescaled_First-time"
## [4] "Returning" "Rescaled_Returning" "Total"
## [7] "Rescaled_Total"
# replace "-" in colnames
names(results_RWA)<- gsub("\\-", "_", names(results_RWA))
# verify updated names
colnames(results_RWA)
## [1] "Attributes" "First_time" "Rescaled_First_time"
## [4] "Returning" "Rescaled_Returning" "Total"
## [7] "Rescaled_Total"
# First_time
# sort df
sorted_Rescaled_First_time<- results_RWA[order(-results_RWA$Rescaled_First_time),]
sorted_Rescaled_First_time$Attributes <- factor(sorted_Rescaled_First_time$Attributes,
levels = sorted_Rescaled_First_time$Attributes[order(sorted_Rescaled_First_time$Rescaled_First_time)])
# create the Rescaled_First_time plot
Rescaled_First_time_p <- ggplot(data = sorted_Rescaled_First_time, aes(x = Attributes, y = Rescaled_First_time)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(x = "Attributes", y = "Rescaled_First_time", title = "Bar Chart of Rescaled_First_time") +
scale_y_continuous(labels = scales::percent_format(scale = 100)) +
coord_flip()
Rescaled_First_time_p
# save to .png
ggsave("Rescaled_First_time_p.png", width = 40, height = 20, units = "cm")
# Returning
# sort df
sorted_Rescaled_Returning<- results_RWA[order(-results_RWA$Rescaled_Returning),]
sorted_Rescaled_Returning$Attributes <- factor(sorted_Rescaled_Returning$Attributes,
levels = sorted_Rescaled_Returning$Attributes[order(sorted_Rescaled_Returning$Rescaled_Returning)])
# create the Rescaled_Returning plot
Rescaled_Returning_p <- ggplot(data = sorted_Rescaled_Returning, aes(x = Attributes, y = Rescaled_Returning)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(x = "Attributes", y = "Rescaled_Returning", title = "Bar Chart of Rescaled_Returning") +
scale_y_continuous(labels = scales::percent_format(scale = 100)) +
coord_flip()
Rescaled_Returning_p
# save to .png
ggsave("Rescaled_Returning_p.png", width = 40, height = 20, units = "cm")
# Total
# sort df
sorted_Rescaled_Total<- results_RWA[order(-results_RWA$Rescaled_Total),]
sorted_Rescaled_Total$Attributes <- factor(sorted_Rescaled_Total$Attributes,
levels = sorted_Rescaled_Total$Attributes[order(sorted_Rescaled_Total$Rescaled_Total)])
# create the Total plot
Total_p <- ggplot(data = sorted_Rescaled_Total, aes(x = Attributes, y = Rescaled_Total)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(x = "Attributes", y = "Rescaled Total", title = "Bar Chart of Rescaled Total") +
scale_y_continuous(labels = scales::percent_format(scale = 100)) +
coord_flip()
Total_p
# save to .png
ggsave("Total_p.png", width = 40, height = 20, units = "cm")
That concludes the RWA!
Let’s consider some of the insights we’ve gained from this analysis.
For first-time customers In-flight Wi-Fi Service, Ease of Online Booking, and Online Boarding are the top 3 factors driving their overall satisfaction. For returning customer Online Boarding, Leg Room Service, and In-flight Entertainment are the top 3 factors driving their overall satisfaction.
Equipped with this knowledge the airline can adjust their services to efficiently maximize customer satisfaction and ideally keep their customers coming back.
In a competitive landscape the airline could also collect the same information for their top competitors and we can model their data as well. We can see where competitors are leading or lagging in the attributes and the airline can adjust services to gain new customers.