R Markdown

Which factors are driving consumer experience? This is a common business question that can be answered by key driver analysis. Let’s dive in!

Read in the data file

Total.data<- read.csv("g:/Portfolio Projects/RWA/Data/airline_passenger_satisfaction.csv")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## embedded nul(s) found in input
str(Total.data)
## 'data.frame':    129558 obs. of  25 variables:
##  $ ID                                    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Gender                                : chr  "Male" "Female" "Male" "Male" ...
##  $ Age                                   : int  48 35 41 50 49 43 43 60 50 38 ...
##  $ Customer.Type                         : chr  "First-time" "Returning" "Returning" "Returning" ...
##  $ Type.of.Travel                        : chr  "Business" "Business" "Business" "Business" ...
##  $ Class                                 : chr  "Business" "Business" "Business" "Business" ...
##  $ Flight.Distance                       : int  821 821 853 1905 3470 3788 1963 853 2607 2822 ...
##  $ Departure.Delay                       : int  2 26 0 0 0 0 0 0 0 13 ...
##  $ Arrival.Delay                         : int  5 39 0 0 1 0 0 3 0 0 ...
##  $ Departure.and.Arrival.Time.Convenience: int  3 2 4 2 3 4 3 3 1 2 ...
##  $ Ease.of.Online.Booking                : chr  "3" "2" "4" "2" ...
##  $ Check.in.Service                      : int  4 3 4 3 3 3 4 3 3 3 ...
##  $ Online.Boarding                       : int  3 5 5 4 5 5 4 4 2 5 ...
##  $ Gate.Location                         : int  3 2 4 2 3 4 3 4 1 2 ...
##  $ On.board.Service                      : int  3 5 3 5 3 4 5 3 4 5 ...
##  $ Seat.Comfort                          : int  5 4 5 5 4 4 5 4 3 4 ...
##  $ Leg.Room.Service                      : int  2 5 3 5 4 4 5 4 4 5 ...
##  $ Cleanliness                           : int  5 5 5 4 5 3 4 4 3 4 ...
##  $ Food.and.Drink                        : int  5 3 5 4 4 3 5 4 3 2 ...
##  $ In.flight.Service                     : int  5 5 3 5 3 4 5 3 4 5 ...
##  $ In.flight.Wifi.Service                : int  3 2 4 2 3 4 3 4 4 2 ...
##  $ In.flight.Entertainment               : int  5 5 3 5 3 4 5 3 4 5 ...
##  $ Baggage.Handling                      : int  5 5 3 5 3 4 5 3 4 5 ...
##  $ Satisfaction                          : chr  "Neutral or Dissatisfied" "Satisfied" "Satisfied" "Satisfied" ...
##  $ Satisfaction.Input                    : int  2 5 4 5 4 5 4 5 2 5 ...
# RENAME DEPENDENT VAR TO "DVAR"
Total.data<- rename(Total.data, DVAR=Satisfaction.Input)

# examine column labels
names(Total.data)
##  [1] "ID"                                    
##  [2] "Gender"                                
##  [3] "Age"                                   
##  [4] "Customer.Type"                         
##  [5] "Type.of.Travel"                        
##  [6] "Class"                                 
##  [7] "Flight.Distance"                       
##  [8] "Departure.Delay"                       
##  [9] "Arrival.Delay"                         
## [10] "Departure.and.Arrival.Time.Convenience"
## [11] "Ease.of.Online.Booking"                
## [12] "Check.in.Service"                      
## [13] "Online.Boarding"                       
## [14] "Gate.Location"                         
## [15] "On.board.Service"                      
## [16] "Seat.Comfort"                          
## [17] "Leg.Room.Service"                      
## [18] "Cleanliness"                           
## [19] "Food.and.Drink"                        
## [20] "In.flight.Service"                     
## [21] "In.flight.Wifi.Service"                
## [22] "In.flight.Entertainment"               
## [23] "Baggage.Handling"                      
## [24] "Satisfaction"                          
## [25] "DVAR"
# view the data table
kable(Total.data[1:5, 1:10], caption = "Examine the source data")
Examine the source data
ID Gender Age Customer.Type Type.of.Travel Class Flight.Distance Departure.Delay Arrival.Delay Departure.and.Arrival.Time.Convenience
1 Male 48 First-time Business Business 821 2 5 3
2 Female 35 Returning Business Business 821 26 39 2
3 Male 41 Returning Business Business 853 0 0 4
4 Male 50 Returning Business Business 1905 0 0 2
5 Female 49 Returning Business Business 3470 0 1 3

Prep the data and perform relative weights analysis (RWA)

# create correlation matrix
res<- Total.data[c("DVAR","Departure.and.Arrival.Time.Convenience","Ease.of.Online.Booking","Check.in.Service","Online.Boarding",
                    "Gate.Location","On.board.Service","Seat.Comfort","Leg.Room.Service","Cleanliness","Food.and.Drink",
                    "In.flight.Service","In.flight.Wifi.Service","In.flight.Entertainment","Baggage.Handling")]

# ensure all variables are numeric
res<- res %>% mutate_if(is.character, as.numeric)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
# correlation matrix
res<- cor(res, use = "complete.obs")
res<- round(res, digits = 4)

# set predictors and response
ys<- as.numeric(which(colnames(res)=="DVAR"))
xs<- as.numeric(which(colnames(res)=="DVAR")+1):ncol(res)

# RWA    
results<- relWt(res, ys, xs)
results_final_RWA<- results$eps

# rescale Everything
Rescaled<- sweep(results_final_RWA, 1, colSums(results_final_RWA),"/")
Rescaled<- rename(Rescaled, Rescaled_Total=EPS)
results_final_RWA<- cbind(results_final_RWA, Rescaled)
results_final_RWA<- rename(results_final_RWA, Total=EPS)

# results are ratio-scaled to sum to 100%
kable(results_final_RWA, caption = "Ratio-scaled results for total sample")
Ratio-scaled results for total sample
Total Rescaled_Total
Departure.and.Arrival.Time.Convenience 0.0085802 0.0284155
Ease.of.Online.Booking 0.0089013 0.0294789
Check.in.Service 0.0156018 0.0516693
Online.Boarding 0.0995493 0.3296829
Gate.Location 0.0010319 0.0034173
On.board.Service 0.0245334 0.0812487
Seat.Comfort 0.0252866 0.0837430
Leg.Room.Service 0.0297378 0.0984842
Cleanliness 0.0155009 0.0513351
Food.and.Drink 0.0067804 0.0224549
In.flight.Service 0.0096491 0.0319553
In.flight.Wifi.Service 0.0213633 0.0707499
In.flight.Entertainment 0.0252480 0.0836153
Baggage.Handling 0.0101909 0.0337497

Audience cuts

Let’s run models for customer type - First-time and Returning

# RENAME audience VAR TO generic "audience"

Total.data.cuts<- rename(Total.data, audience=Customer.Type)

# store labels from character variable
audience.labels<- unique(Total.data.cuts$audience)

# convert character variable to factor
Total.data.cuts$audience<- Total.data.cuts$audience %>% 
  as_factor() %>% 
  as.numeric()
  
cuts <- Total.data.cuts %>% select(audience) %>% distinct() %>% pull() %>% sort()

# run loop
for (x in cuts) {

  # create covariance matrix
  audience_res <- Total.data.cuts
  audience_res <- filter(audience_res, audience == x)
  audience_res <- audience_res[c("DVAR","Departure.and.Arrival.Time.Convenience","Ease.of.Online.Booking","Check.in.Service","Online.Boarding",
                                 "Gate.Location","On.board.Service","Seat.Comfort","Leg.Room.Service","Cleanliness","Food.and.Drink",
                                 "In.flight.Service","In.flight.Wifi.Service","In.flight.Entertainment","Baggage.Handling")]
  # ensure all variables are numeric
  audience_res<- audience_res %>% mutate_if(is.character, as.numeric)
  audience_res <- cor(audience_res,use = "complete.obs")
  audience_res <- round(audience_res,digits = 4)

  # set predictors and response
  audience_ys<- as.numeric(which(colnames(audience_res)=="DVAR"))
  audience_xs<- as.numeric(which(colnames(audience_res)=="DVAR")+1):ncol(audience_res)

  if (x == 1) {
    # for first iteration, create results dataframe
    audience_results <- relWt(audience_res, audience_ys, audience_xs)
    audience_results_eps <- audience_results[["eps"]]
    audience_results_final_RWA <- cbind(Attributes = rownames(audience_results_eps), audience_results_eps) %>%
      rename(!!paste0(audience.labels[x]) := EPS)
    rownames(audience_results_final_RWA) <- 1:nrow(audience_results_eps)
    # rescale Everything
    Rescaled<- sweep(audience_results_final_RWA[paste0(audience.labels[x])], 1, colSums(audience_results_final_RWA[paste0(audience.labels[x])]),"/")
    Rescaled<- Rescaled %>% rename(!!paste0("Rescaled_",audience.labels[x]) := audience.labels[x])
    audience_results_final_RWA<- cbind(audience_results_final_RWA, Rescaled)
    
  } else {
    # for following iterations, just add results column for audience
    audience_results <- relWt(audience_res, audience_ys, audience_xs)
    audience_results_eps <- audience_results[["eps"]] %>%
      rename(!!paste0(audience.labels[x]) := EPS)
    audience_results_final_RWA <- cbind(audience_results_final_RWA, audience_results_eps)
    # rescale Everything
    Rescaled<- sweep(audience_results_final_RWA[paste0(audience.labels[x])], 1, colSums(audience_results_final_RWA[paste0(audience.labels[x])]),"/")
    Rescaled<- Rescaled %>% rename(!!paste0("Rescaled_",audience.labels[x]) := audience.labels[x])
    audience_results_final_RWA<- cbind(audience_results_final_RWA, Rescaled)
  }
}
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
# results are ratio-scaled to sum to 100%
kable(audience_results_final_RWA, caption = "Ratio-scaled results for customer type")
Ratio-scaled results for customer type
Attributes First-time Rescaled_First-time Returning Rescaled_Returning
Departure.and.Arrival.Time.Convenience 0.0023367 0.0133100 0.0222954 0.0665209
Ease.of.Online.Booking 0.0366404 0.2087082 0.0063041 0.0188090
Check.in.Service 0.0161606 0.0920528 0.0152970 0.0456403
Online.Boarding 0.0311006 0.1771529 0.0888067 0.2649646
Gate.Location 0.0028289 0.0161135 0.0033788 0.0100810
On.board.Service 0.0135534 0.0772017 0.0249313 0.0743854
Seat.Comfort 0.0001362 0.0007757 0.0326329 0.0973639
Leg.Room.Service 0.0021022 0.0119744 0.0368760 0.1100236
Cleanliness 0.0004076 0.0023220 0.0227108 0.0677600
Food.and.Drink 0.0004985 0.0028398 0.0087306 0.0260488
In.flight.Service 0.0109950 0.0626286 0.0106649 0.0318200
In.flight.Wifi.Service 0.0467648 0.2663781 0.0178001 0.0531085
In.flight.Entertainment 0.0003608 0.0020551 0.0335170 0.1000018
Baggage.Handling 0.0116724 0.0664872 0.0112187 0.0334721

Export the results to Excel workbook

# combine the results from total sample and customer type
results_RWA<- cbind(audience_results_final_RWA, results_final_RWA)

# let's take a look at the final data table we'll export to Excel
kable(results_RWA, caption = "Final RWA results")
Final RWA results
Attributes First-time Rescaled_First-time Returning Rescaled_Returning Total Rescaled_Total
Departure.and.Arrival.Time.Convenience 0.0023367 0.0133100 0.0222954 0.0665209 0.0085802 0.0284155
Ease.of.Online.Booking 0.0366404 0.2087082 0.0063041 0.0188090 0.0089013 0.0294789
Check.in.Service 0.0161606 0.0920528 0.0152970 0.0456403 0.0156018 0.0516693
Online.Boarding 0.0311006 0.1771529 0.0888067 0.2649646 0.0995493 0.3296829
Gate.Location 0.0028289 0.0161135 0.0033788 0.0100810 0.0010319 0.0034173
On.board.Service 0.0135534 0.0772017 0.0249313 0.0743854 0.0245334 0.0812487
Seat.Comfort 0.0001362 0.0007757 0.0326329 0.0973639 0.0252866 0.0837430
Leg.Room.Service 0.0021022 0.0119744 0.0368760 0.1100236 0.0297378 0.0984842
Cleanliness 0.0004076 0.0023220 0.0227108 0.0677600 0.0155009 0.0513351
Food.and.Drink 0.0004985 0.0028398 0.0087306 0.0260488 0.0067804 0.0224549
In.flight.Service 0.0109950 0.0626286 0.0106649 0.0318200 0.0096491 0.0319553
In.flight.Wifi.Service 0.0467648 0.2663781 0.0178001 0.0531085 0.0213633 0.0707499
In.flight.Entertainment 0.0003608 0.0020551 0.0335170 0.1000018 0.0252480 0.0836153
Baggage.Handling 0.0116724 0.0664872 0.0112187 0.0334721 0.0101909 0.0337497
# SAVE OUTPUTS
wb <- createWorkbook()
addWorksheet(wb,"RWA")
writeData(wb,sheet = "RWA",results_RWA, rowNames = FALSE)

############################

# SAVE FILE
saveWorkbook(wb, "g:/Portfolio Projects/RWA/RWA Results v1_.xlsx", overwrite = TRUE)

Visualizations

Let’s make some charts to visualize our key drivers!

# create plots

colnames(results_RWA)
## [1] "Attributes"          "First-time"          "Rescaled_First-time"
## [4] "Returning"           "Rescaled_Returning"  "Total"              
## [7] "Rescaled_Total"
# replace "-" in colnames
names(results_RWA)<- gsub("\\-", "_", names(results_RWA))

# verify updated names
colnames(results_RWA)
## [1] "Attributes"          "First_time"          "Rescaled_First_time"
## [4] "Returning"           "Rescaled_Returning"  "Total"              
## [7] "Rescaled_Total"
# First_time
# sort df
sorted_Rescaled_First_time<- results_RWA[order(-results_RWA$Rescaled_First_time),]

sorted_Rescaled_First_time$Attributes <- factor(sorted_Rescaled_First_time$Attributes,
                                           levels = sorted_Rescaled_First_time$Attributes[order(sorted_Rescaled_First_time$Rescaled_First_time)])

# create the Rescaled_First_time plot
Rescaled_First_time_p <- ggplot(data = sorted_Rescaled_First_time, aes(x = Attributes, y = Rescaled_First_time)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(x = "Attributes", y = "Rescaled_First_time", title = "Bar Chart of Rescaled_First_time") +
  scale_y_continuous(labels = scales::percent_format(scale = 100)) +
  coord_flip()

Rescaled_First_time_p

# save to .png
ggsave("Rescaled_First_time_p.png", width = 40, height = 20, units = "cm")

# Returning
# sort df
sorted_Rescaled_Returning<- results_RWA[order(-results_RWA$Rescaled_Returning),]

sorted_Rescaled_Returning$Attributes <- factor(sorted_Rescaled_Returning$Attributes,
                                           levels = sorted_Rescaled_Returning$Attributes[order(sorted_Rescaled_Returning$Rescaled_Returning)])

# create the Rescaled_Returning plot
Rescaled_Returning_p <- ggplot(data = sorted_Rescaled_Returning, aes(x = Attributes, y = Rescaled_Returning)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(x = "Attributes", y = "Rescaled_Returning", title = "Bar Chart of Rescaled_Returning") +
  scale_y_continuous(labels = scales::percent_format(scale = 100)) +
  coord_flip()

Rescaled_Returning_p

# save to .png
ggsave("Rescaled_Returning_p.png", width = 40, height = 20, units = "cm")

# Total
# sort df
sorted_Rescaled_Total<- results_RWA[order(-results_RWA$Rescaled_Total),]

sorted_Rescaled_Total$Attributes <- factor(sorted_Rescaled_Total$Attributes,
                                           levels = sorted_Rescaled_Total$Attributes[order(sorted_Rescaled_Total$Rescaled_Total)])

# create the Total plot
Total_p <- ggplot(data = sorted_Rescaled_Total, aes(x = Attributes, y = Rescaled_Total)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(x = "Attributes", y = "Rescaled Total", title = "Bar Chart of Rescaled Total") +
  scale_y_continuous(labels = scales::percent_format(scale = 100)) +
  coord_flip()

Total_p

# save to .png
ggsave("Total_p.png", width = 40, height = 20, units = "cm")

That concludes the RWA!

Let’s consider some of the insights we’ve gained from this analysis.

For first-time customers In-flight Wi-Fi Service, Ease of Online Booking, and Online Boarding are the top 3 factors driving their overall satisfaction. For returning customer Online Boarding, Leg Room Service, and In-flight Entertainment are the top 3 factors driving their overall satisfaction.

Equipped with this knowledge the airline can adjust their services to efficiently maximize customer satisfaction and ideally keep their customers coming back.

In a competitive landscape the airline could also collect the same information for their top competitors and we can model their data as well. We can see where competitors are leading or lagging in the attributes and the airline can adjust services to gain new customers.