## 'data.frame':    103904 obs. of  25 variables:
##  $ X                                : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ id                               : int  70172 5047 110028 24026 119299 111157 82113 96462 79485 65725 ...
##  $ Gender                           : chr  "Male" "Male" "Female" "Female" ...
##  $ Customer.Type                    : chr  "Loyal Customer" "disloyal Customer" "Loyal Customer" "Loyal Customer" ...
##  $ Age                              : int  13 25 26 25 61 26 47 52 41 20 ...
##  $ Type.of.Travel                   : chr  "Personal Travel" "Business travel" "Business travel" "Business travel" ...
##  $ Class                            : chr  "Eco Plus" "Business" "Business" "Business" ...
##  $ Flight.Distance                  : int  460 235 1142 562 214 1180 1276 2035 853 1061 ...
##  $ Inflight.wifi.service            : int  3 3 2 2 3 3 2 4 1 3 ...
##  $ Departure.Arrival.time.convenient: int  4 2 2 5 3 4 4 3 2 3 ...
##  $ Ease.of.Online.booking           : int  3 3 2 5 3 2 2 4 2 3 ...
##  $ Gate.location                    : int  1 3 2 5 3 1 3 4 2 4 ...
##  $ Food.and.drink                   : int  5 1 5 2 4 1 2 5 4 2 ...
##  $ Online.boarding                  : int  3 3 5 2 5 2 2 5 3 3 ...
##  $ Seat.comfort                     : int  5 1 5 2 5 1 2 5 3 3 ...
##  $ Inflight.entertainment           : int  5 1 5 2 3 1 2 5 1 2 ...
##  $ On.board.service                 : int  4 1 4 2 3 3 3 5 1 2 ...
##  $ Leg.room.service                 : int  3 5 3 5 4 4 3 5 2 3 ...
##  $ Baggage.handling                 : int  4 3 4 3 4 4 4 5 1 4 ...
##  $ Checkin.service                  : int  4 1 4 1 3 4 3 4 4 4 ...
##  $ Inflight.service                 : int  5 4 4 4 3 4 5 5 1 3 ...
##  $ Cleanliness                      : int  5 1 5 2 3 1 2 4 2 2 ...
##  $ Departure.Delay.in.Minutes       : int  25 1 0 11 0 0 9 4 0 0 ...
##  $ Arrival.Delay.in.Minutes         : num  18 6 0 9 0 0 23 0 0 0 ...
##  $ satisfaction                     : chr  "neutral or dissatisfied" "neutral or dissatisfied" "satisfied" "neutral or dissatisfied" ...
##        X                id            Gender          Customer.Type     
##  Min.   :     0   Min.   :     1   Length:103904      Length:103904     
##  1st Qu.: 25976   1st Qu.: 32534   Class :character   Class :character  
##  Median : 51952   Median : 64857   Mode  :character   Mode  :character  
##  Mean   : 51952   Mean   : 64924                                        
##  3rd Qu.: 77927   3rd Qu.: 97368                                        
##  Max.   :103903   Max.   :129880                                        
##                                                                         
##       Age        Type.of.Travel        Class           Flight.Distance
##  Min.   : 7.00   Length:103904      Length:103904      Min.   :  31   
##  1st Qu.:27.00   Class :character   Class :character   1st Qu.: 414   
##  Median :40.00   Mode  :character   Mode  :character   Median : 843   
##  Mean   :39.38                                         Mean   :1189   
##  3rd Qu.:51.00                                         3rd Qu.:1743   
##  Max.   :85.00                                         Max.   :4983   
##                                                                       
##  Inflight.wifi.service Departure.Arrival.time.convenient Ease.of.Online.booking
##  Min.   :0.00          Min.   :0.00                      Min.   :0.000         
##  1st Qu.:2.00          1st Qu.:2.00                      1st Qu.:2.000         
##  Median :3.00          Median :3.00                      Median :3.000         
##  Mean   :2.73          Mean   :3.06                      Mean   :2.757         
##  3rd Qu.:4.00          3rd Qu.:4.00                      3rd Qu.:4.000         
##  Max.   :5.00          Max.   :5.00                      Max.   :5.000         
##                                                                                
##  Gate.location   Food.and.drink  Online.boarding  Seat.comfort  
##  Min.   :0.000   Min.   :0.000   Min.   :0.00    Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.00    1st Qu.:2.000  
##  Median :3.000   Median :3.000   Median :3.00    Median :4.000  
##  Mean   :2.977   Mean   :3.202   Mean   :3.25    Mean   :3.439  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00    3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00    Max.   :5.000  
##                                                                 
##  Inflight.entertainment On.board.service Leg.room.service Baggage.handling
##  Min.   :0.000          Min.   :0.000    Min.   :0.000    Min.   :1.000   
##  1st Qu.:2.000          1st Qu.:2.000    1st Qu.:2.000    1st Qu.:3.000   
##  Median :4.000          Median :4.000    Median :4.000    Median :4.000   
##  Mean   :3.358          Mean   :3.382    Mean   :3.351    Mean   :3.632   
##  3rd Qu.:4.000          3rd Qu.:4.000    3rd Qu.:4.000    3rd Qu.:5.000   
##  Max.   :5.000          Max.   :5.000    Max.   :5.000    Max.   :5.000   
##                                                                           
##  Checkin.service Inflight.service  Cleanliness    Departure.Delay.in.Minutes
##  Min.   :0.000   Min.   :0.00     Min.   :0.000   Min.   :   0.00           
##  1st Qu.:3.000   1st Qu.:3.00     1st Qu.:2.000   1st Qu.:   0.00           
##  Median :3.000   Median :4.00     Median :3.000   Median :   0.00           
##  Mean   :3.304   Mean   :3.64     Mean   :3.286   Mean   :  14.82           
##  3rd Qu.:4.000   3rd Qu.:5.00     3rd Qu.:4.000   3rd Qu.:  12.00           
##  Max.   :5.000   Max.   :5.00     Max.   :5.000   Max.   :1592.00           
##                                                                             
##  Arrival.Delay.in.Minutes satisfaction      
##  Min.   :   0.00          Length:103904     
##  1st Qu.:   0.00          Class :character  
##  Median :   0.00          Mode  :character  
##  Mean   :  15.18                            
##  3rd Qu.:  13.00                            
##  Max.   :1584.00                            
##  NA's   :310
##   X     id Gender     Customer.Type Age  Type.of.Travel    Class
## 1 0  70172   Male    Loyal Customer  13 Personal Travel Eco Plus
## 2 1   5047   Male disloyal Customer  25 Business travel Business
## 3 2 110028 Female    Loyal Customer  26 Business travel Business
## 4 3  24026 Female    Loyal Customer  25 Business travel Business
## 5 4 119299   Male    Loyal Customer  61 Business travel Business
## 6 5 111157 Female    Loyal Customer  26 Personal Travel      Eco
##   Flight.Distance Inflight.wifi.service Departure.Arrival.time.convenient
## 1             460                     3                                 4
## 2             235                     3                                 2
## 3            1142                     2                                 2
## 4             562                     2                                 5
## 5             214                     3                                 3
## 6            1180                     3                                 4
##   Ease.of.Online.booking Gate.location Food.and.drink Online.boarding
## 1                      3             1              5               3
## 2                      3             3              1               3
## 3                      2             2              5               5
## 4                      5             5              2               2
## 5                      3             3              4               5
## 6                      2             1              1               2
##   Seat.comfort Inflight.entertainment On.board.service Leg.room.service
## 1            5                      5                4                3
## 2            1                      1                1                5
## 3            5                      5                4                3
## 4            2                      2                2                5
## 5            5                      3                3                4
## 6            1                      1                3                4
##   Baggage.handling Checkin.service Inflight.service Cleanliness
## 1                4               4                5           5
## 2                3               1                4           1
## 3                4               4                4           5
## 4                3               1                4           2
## 5                4               3                3           3
## 6                4               4                4           1
##   Departure.Delay.in.Minutes Arrival.Delay.in.Minutes            satisfaction
## 1                         25                       18 neutral or dissatisfied
## 2                          1                        6 neutral or dissatisfied
## 3                          0                        0               satisfied
## 4                         11                        9 neutral or dissatisfied
## 5                          0                        0               satisfied
## 6                          0                        0 neutral or dissatisfied

Preprocessing

## Remove missing values
data1 <- data %>% 
  na.omit()     
dim(data1)
## [1] 103594     25
glimpse(data1)
## Rows: 103,594
## Columns: 25
## $ X                                 <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ id                                <int> 70172, 5047, 110028, 24026, 119299, …
## $ Gender                            <chr> "Male", "Male", "Female", "Female", …
## $ Customer.Type                     <chr> "Loyal Customer", "disloyal Customer…
## $ Age                               <int> 13, 25, 26, 25, 61, 26, 47, 52, 41, …
## $ Type.of.Travel                    <chr> "Personal Travel", "Business travel"…
## $ Class                             <chr> "Eco Plus", "Business", "Business", …
## $ Flight.Distance                   <int> 460, 235, 1142, 562, 214, 1180, 1276…
## $ Inflight.wifi.service             <int> 3, 3, 2, 2, 3, 3, 2, 4, 1, 3, 4, 2, …
## $ Departure.Arrival.time.convenient <int> 4, 2, 2, 5, 3, 4, 4, 3, 2, 3, 5, 4, …
## $ Ease.of.Online.booking            <int> 3, 3, 2, 5, 3, 2, 2, 4, 2, 3, 5, 2, …
## $ Gate.location                     <int> 1, 3, 2, 5, 3, 1, 3, 4, 2, 4, 4, 2, …
## $ Food.and.drink                    <int> 5, 1, 5, 2, 4, 1, 2, 5, 4, 2, 2, 1, …
## $ Online.boarding                   <int> 3, 3, 5, 2, 5, 2, 2, 5, 3, 3, 5, 2, …
## $ Seat.comfort                      <int> 5, 1, 5, 2, 5, 1, 2, 5, 3, 3, 2, 1, …
## $ Inflight.entertainment            <int> 5, 1, 5, 2, 3, 1, 2, 5, 1, 2, 2, 1, …
## $ On.board.service                  <int> 4, 1, 4, 2, 3, 3, 3, 5, 1, 2, 3, 1, …
## $ Leg.room.service                  <int> 3, 5, 3, 5, 4, 4, 3, 5, 2, 3, 3, 2, …
## $ Baggage.handling                  <int> 4, 3, 4, 3, 4, 4, 4, 5, 1, 4, 5, 5, …
## $ Checkin.service                   <int> 4, 1, 4, 1, 3, 4, 3, 4, 4, 4, 3, 5, …
## $ Inflight.service                  <int> 5, 4, 4, 4, 3, 4, 5, 5, 1, 3, 5, 5, …
## $ Cleanliness                       <int> 5, 1, 5, 2, 3, 1, 2, 4, 2, 2, 2, 1, …
## $ Departure.Delay.in.Minutes        <int> 25, 1, 0, 11, 0, 0, 9, 4, 0, 0, 0, 0…
## $ Arrival.Delay.in.Minutes          <dbl> 18, 6, 0, 9, 0, 0, 23, 0, 0, 0, 0, 0…
## $ satisfaction                      <chr> "neutral or dissatisfied", "neutral …
summary(data1)
##        X                id            Gender          Customer.Type     
##  Min.   :     0   Min.   :     1   Length:103594      Length:103594     
##  1st Qu.: 25960   1st Qu.: 32562   Class :character   Class :character  
##  Median : 51956   Median : 64890   Mode  :character   Mode  :character  
##  Mean   : 51950   Mean   : 64942                                        
##  3rd Qu.: 77925   3rd Qu.: 97371                                        
##  Max.   :103903   Max.   :129880                                        
##       Age        Type.of.Travel        Class           Flight.Distance
##  Min.   : 7.00   Length:103594      Length:103594      Min.   :  31   
##  1st Qu.:27.00   Class :character   Class :character   1st Qu.: 414   
##  Median :40.00   Mode  :character   Mode  :character   Median : 842   
##  Mean   :39.38                                         Mean   :1189   
##  3rd Qu.:51.00                                         3rd Qu.:1743   
##  Max.   :85.00                                         Max.   :4983   
##  Inflight.wifi.service Departure.Arrival.time.convenient Ease.of.Online.booking
##  Min.   :0.00          Min.   :0.00                      Min.   :0.000         
##  1st Qu.:2.00          1st Qu.:2.00                      1st Qu.:2.000         
##  Median :3.00          Median :3.00                      Median :3.000         
##  Mean   :2.73          Mean   :3.06                      Mean   :2.757         
##  3rd Qu.:4.00          3rd Qu.:4.00                      3rd Qu.:4.000         
##  Max.   :5.00          Max.   :5.00                      Max.   :5.000         
##  Gate.location   Food.and.drink  Online.boarding  Seat.comfort 
##  Min.   :0.000   Min.   :0.000   Min.   :0.00    Min.   :0.00  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.00    1st Qu.:2.00  
##  Median :3.000   Median :3.000   Median :3.00    Median :4.00  
##  Mean   :2.977   Mean   :3.202   Mean   :3.25    Mean   :3.44  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00    3rd Qu.:5.00  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00    Max.   :5.00  
##  Inflight.entertainment On.board.service Leg.room.service Baggage.handling
##  Min.   :0.000          Min.   :0.000    Min.   :0.000    Min.   :1.000   
##  1st Qu.:2.000          1st Qu.:2.000    1st Qu.:2.000    1st Qu.:3.000   
##  Median :4.000          Median :4.000    Median :4.000    Median :4.000   
##  Mean   :3.358          Mean   :3.383    Mean   :3.351    Mean   :3.632   
##  3rd Qu.:4.000          3rd Qu.:4.000    3rd Qu.:4.000    3rd Qu.:5.000   
##  Max.   :5.000          Max.   :5.000    Max.   :5.000    Max.   :5.000   
##  Checkin.service Inflight.service  Cleanliness    Departure.Delay.in.Minutes
##  Min.   :0.000   Min.   :0.000    Min.   :0.000   Min.   :   0.00           
##  1st Qu.:3.000   1st Qu.:3.000    1st Qu.:2.000   1st Qu.:   0.00           
##  Median :3.000   Median :4.000    Median :3.000   Median :   0.00           
##  Mean   :3.304   Mean   :3.641    Mean   :3.286   Mean   :  14.75           
##  3rd Qu.:4.000   3rd Qu.:5.000    3rd Qu.:4.000   3rd Qu.:  12.00           
##  Max.   :5.000   Max.   :5.000    Max.   :5.000   Max.   :1592.00           
##  Arrival.Delay.in.Minutes satisfaction      
##  Min.   :   0.00          Length:103594     
##  1st Qu.:   0.00          Class :character  
##  Median :   0.00          Mode  :character  
##  Mean   :  15.18                            
##  3rd Qu.:  13.00                            
##  Max.   :1584.00
##convert to factor/numeric

data1$satisfaction <- ordered(data1$satisfaction, levels=c("satisfied", "neutral or dissatisfied"))
data1$Gender <- ordered(data1$Gender, levels=c("Male", "Female"))
data1$Customer.Type <- ordered(data1$Customer.Type, levels=c("Loyal Customer", "disloyal Customer"))
data1$Class <- ordered(data1$Class, levels=c("Eco Plus", "Business", "Eco"))
data1$Type.of.Travel <- ordered(data1$Type.of.Travel, levels=c("Personal Travel", "Business travel"))

summary(data1)
##        X                id            Gender                Customer.Type  
##  Min.   :     0   Min.   :     1   Male  :51018   Loyal Customer   :84662  
##  1st Qu.: 25960   1st Qu.: 32562   Female:52576   disloyal Customer:18932  
##  Median : 51956   Median : 64890                                           
##  Mean   : 51950   Mean   : 64942                                           
##  3rd Qu.: 77925   3rd Qu.: 97371                                           
##  Max.   :103903   Max.   :129880                                           
##       Age                Type.of.Travel       Class       Flight.Distance
##  Min.   : 7.00   Personal Travel:32129   Eco Plus: 7468   Min.   :  31   
##  1st Qu.:27.00   Business travel:71465   Business:49533   1st Qu.: 414   
##  Median :40.00                           Eco     :46593   Median : 842   
##  Mean   :39.38                                            Mean   :1189   
##  3rd Qu.:51.00                                            3rd Qu.:1743   
##  Max.   :85.00                                            Max.   :4983   
##  Inflight.wifi.service Departure.Arrival.time.convenient Ease.of.Online.booking
##  Min.   :0.00          Min.   :0.00                      Min.   :0.000         
##  1st Qu.:2.00          1st Qu.:2.00                      1st Qu.:2.000         
##  Median :3.00          Median :3.00                      Median :3.000         
##  Mean   :2.73          Mean   :3.06                      Mean   :2.757         
##  3rd Qu.:4.00          3rd Qu.:4.00                      3rd Qu.:4.000         
##  Max.   :5.00          Max.   :5.00                      Max.   :5.000         
##  Gate.location   Food.and.drink  Online.boarding  Seat.comfort 
##  Min.   :0.000   Min.   :0.000   Min.   :0.00    Min.   :0.00  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.00    1st Qu.:2.00  
##  Median :3.000   Median :3.000   Median :3.00    Median :4.00  
##  Mean   :2.977   Mean   :3.202   Mean   :3.25    Mean   :3.44  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00    3rd Qu.:5.00  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00    Max.   :5.00  
##  Inflight.entertainment On.board.service Leg.room.service Baggage.handling
##  Min.   :0.000          Min.   :0.000    Min.   :0.000    Min.   :1.000   
##  1st Qu.:2.000          1st Qu.:2.000    1st Qu.:2.000    1st Qu.:3.000   
##  Median :4.000          Median :4.000    Median :4.000    Median :4.000   
##  Mean   :3.358          Mean   :3.383    Mean   :3.351    Mean   :3.632   
##  3rd Qu.:4.000          3rd Qu.:4.000    3rd Qu.:4.000    3rd Qu.:5.000   
##  Max.   :5.000          Max.   :5.000    Max.   :5.000    Max.   :5.000   
##  Checkin.service Inflight.service  Cleanliness    Departure.Delay.in.Minutes
##  Min.   :0.000   Min.   :0.000    Min.   :0.000   Min.   :   0.00           
##  1st Qu.:3.000   1st Qu.:3.000    1st Qu.:2.000   1st Qu.:   0.00           
##  Median :3.000   Median :4.000    Median :3.000   Median :   0.00           
##  Mean   :3.304   Mean   :3.641    Mean   :3.286   Mean   :  14.75           
##  3rd Qu.:4.000   3rd Qu.:5.000    3rd Qu.:4.000   3rd Qu.:  12.00           
##  Max.   :5.000   Max.   :5.000    Max.   :5.000   Max.   :1592.00           
##  Arrival.Delay.in.Minutes                  satisfaction  
##  Min.   :   0.00          satisfied              :44897  
##  1st Qu.:   0.00          neutral or dissatisfied:58697  
##  Median :   0.00                                         
##  Mean   :  15.18                                         
##  3rd Qu.:  13.00                                         
##  Max.   :1584.00
str(data1)
## 'data.frame':    103594 obs. of  25 variables:
##  $ X                                : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ id                               : int  70172 5047 110028 24026 119299 111157 82113 96462 79485 65725 ...
##  $ Gender                           : Ord.factor w/ 2 levels "Male"<"Female": 1 1 2 2 1 2 1 2 2 1 ...
##  $ Customer.Type                    : Ord.factor w/ 2 levels "Loyal Customer"<..: 1 2 1 1 1 1 1 1 1 2 ...
##  $ Age                              : int  13 25 26 25 61 26 47 52 41 20 ...
##  $ Type.of.Travel                   : Ord.factor w/ 2 levels "Personal Travel"<..: 1 2 2 2 2 1 1 2 2 2 ...
##  $ Class                            : Ord.factor w/ 3 levels "Eco Plus"<"Business"<..: 1 2 2 2 2 3 3 2 2 3 ...
##  $ Flight.Distance                  : int  460 235 1142 562 214 1180 1276 2035 853 1061 ...
##  $ Inflight.wifi.service            : int  3 3 2 2 3 3 2 4 1 3 ...
##  $ Departure.Arrival.time.convenient: int  4 2 2 5 3 4 4 3 2 3 ...
##  $ Ease.of.Online.booking           : int  3 3 2 5 3 2 2 4 2 3 ...
##  $ Gate.location                    : int  1 3 2 5 3 1 3 4 2 4 ...
##  $ Food.and.drink                   : int  5 1 5 2 4 1 2 5 4 2 ...
##  $ Online.boarding                  : int  3 3 5 2 5 2 2 5 3 3 ...
##  $ Seat.comfort                     : int  5 1 5 2 5 1 2 5 3 3 ...
##  $ Inflight.entertainment           : int  5 1 5 2 3 1 2 5 1 2 ...
##  $ On.board.service                 : int  4 1 4 2 3 3 3 5 1 2 ...
##  $ Leg.room.service                 : int  3 5 3 5 4 4 3 5 2 3 ...
##  $ Baggage.handling                 : int  4 3 4 3 4 4 4 5 1 4 ...
##  $ Checkin.service                  : int  4 1 4 1 3 4 3 4 4 4 ...
##  $ Inflight.service                 : int  5 4 4 4 3 4 5 5 1 3 ...
##  $ Cleanliness                      : int  5 1 5 2 3 1 2 4 2 2 ...
##  $ Departure.Delay.in.Minutes       : int  25 1 0 11 0 0 9 4 0 0 ...
##  $ Arrival.Delay.in.Minutes         : num  18 6 0 9 0 0 23 0 0 0 ...
##  $ satisfaction                     : Ord.factor w/ 2 levels "satisfied"<"neutral or dissatisfied": 2 2 1 2 1 2 2 1 2 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:310] 214 1125 1530 2005 2109 2486 2631 3622 4042 4491 ...
##   ..- attr(*, "names")= chr [1:310] "214" "1125" "1530" "2005" ...

#EDA

#categorical variables

ggplot(gather(data1 %>% select_if(is.factor)), aes(value)) + 
    geom_bar(bins = 10,fill="firebrick") + 
    facet_wrap(~key, scales = 'free_x') + labs(x="Categorical",
                                               y="Value")
## Warning: attributes are not identical across measure variables; they will be
## dropped
## Warning in geom_bar(bins = 10, fill = "firebrick"): Ignoring unknown
## parameters: `bins`

#numerical variables

ggplot(gather(data1 %>% select_if(is.numeric)), aes(value)) + 
    geom_histogram(bins = 10, fill="turquoise4") + 
    facet_wrap(~key, scales = 'free_x')

#library(corrplot)
#as.numeric(data1)
#M = cor(data1)
#corrplot(M, method = 'number')

Chi squared test and contingency tables

##                          Seat.comfort
## satisfaction                  0     1     2     3     4     5
##   satisfied                   0  2690  3330  3944 17775 17158
##   neutral or dissatisfied     1  9341 11516 14697 13907  9235
##                          Cleanliness
## satisfaction                  0     1     2     3     4     5
##   satisfied                   0  2607  3428 10581 14513 13768
##   neutral or dissatisfied    12 10669 12653 13925 12587  8851
##                          Inflight.service
## satisfaction                  0     1     2     3     4     5
##   satisfied                   0  2046  3439  4833 18082 16497
##   neutral or dissatisfied     3  5017  7975 15394 19764 10544
##                          Inflight.entertainment
## satisfaction                  0     1     2     3     4     5
##   satisfied                   0  1740  3750  5146 17954 16307
##   neutral or dissatisfied    14 10701 13829 13934 11381  8838
##                          Checkin.service
## satisfaction                  0     1     2     3     4     5
##   satisfied                   0  3076  3248 12717 13324 12532
##   neutral or dissatisfied     1  9776  9606 15639 15651  8024
##                          Gate.location
## satisfaction                  0     1     2     3     4     5
##   satisfied                   1  8677  8949  9889  9458  7923
##   neutral or dissatisfied     0  8834 10447 18600 14895  5921
##                          Baggage.handling
## satisfaction                  1     2     3     4     5
##   satisfied                2148  3397  4848 17929 16575
##   neutral or dissatisfied  5075  8086 15719 19345 10472
##                          Departure.Arrival.time.convenient
## satisfaction                  0     1     2     3     4     5
##   satisfied                2516  7519  7639  7847  9879  9497
##   neutral or dissatisfied  2774  7933  9503 10056 15595 12836
##                    X^2 df P(> X^2)
## Likelihood Ratio 16317  5        0
## Pearson          15685  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.363 
## Cramer's V        : 0.389

Mosaic Display

#(inflight1 <- xtabs(~satisfaction + Seat.comfort, data=data1))
df1<-table(data1$satisfaction, data1$Seat.comfort)
df1
##                          
##                               0     1     2     3     4     5
##   satisfied                   0  2690  3330  3944 17775 17158
##   neutral or dissatisfied     1  9341 11516 14697 13907  9235
a= data1[, c("satisfaction", "Seat.comfort")]
da= xtabs(~ satisfaction + Seat.comfort, data = a)
loglm(~ satisfaction + Seat.comfort, data = da)
## Call:
## loglm(formula = ~satisfaction + Seat.comfort, data = da)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 16317.00  5        0
## Pearson          15685.04  5        0
assocstats(df1)
##                    X^2 df P(> X^2)
## Likelihood Ratio 16317  5        0
## Pearson          15685  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.363 
## Cramer's V        : 0.389
mosaic(~satisfaction + Seat.comfort, data = a, expected = ~satisfaction  + Seat.comfort, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#(inflight2 <- xtabs(~satisfaction + Cleanliness, data=data1))

df2<-table(data1$satisfaction, data1$Cleanliness)
df2
##                          
##                               0     1     2     3     4     5
##   satisfied                   0  2607  3428 10581 14513 13768
##   neutral or dissatisfied    12 10669 12653 13925 12587  8851
b= data1[, c("satisfaction", "Cleanliness")]
db= xtabs(~ satisfaction + Cleanliness, data = b)
loglm(~ satisfaction + Cleanliness, data = db)
## Call:
## loglm(formula = ~satisfaction + Cleanliness, data = db)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 10726.50  5        0
## Pearson          10204.56  5        0
assocstats(df2)
##                    X^2 df P(> X^2)
## Likelihood Ratio 10726  5        0
## Pearson          10205  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.299 
## Cramer's V        : 0.314
mosaic(~satisfaction + Cleanliness, data = b,expected = ~satisfaction  + Cleanliness, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#(inflight3 <- xtabs(~satisfaction + Inflight.service, data=data1))

df3<-table(data1$satisfaction, data1$Inflight.service)
df3
##                          
##                               0     1     2     3     4     5
##   satisfied                   0  2046  3439  4833 18082 16497
##   neutral or dissatisfied     3  5017  7975 15394 19764 10544
c= data1[, c("satisfaction", "Inflight.service")]
dc= xtabs(~ satisfaction + Inflight.service, data = c)
loglm(~ satisfaction + Inflight.service, data = dc)
## Call:
## loglm(formula = ~satisfaction + Inflight.service, data = dc)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 8496.132  5        0
## Pearson          8263.110  5        0
assocstats(df3)
##                     X^2 df P(> X^2)
## Likelihood Ratio 8496.1  5        0
## Pearson          8263.1  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.272 
## Cramer's V        : 0.282
mosaic(~satisfaction + Inflight.service, data = c,expected = ~satisfaction  + Inflight.service, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#(inflight4 <- xtabs(~satisfaction + Inflight.entertainment, data=data1))
df4<-table(data1$satisfaction, data1$Inflight.entertainment)
df4
##                          
##                               0     1     2     3     4     5
##   satisfied                   0  1740  3750  5146 17954 16307
##   neutral or dissatisfied    14 10701 13829 13934 11381  8838
d= data1[, c("satisfaction", "Inflight.entertainment")]
dd= xtabs(~ satisfaction + Inflight.entertainment, data = d)
loglm(~ satisfaction + Inflight.entertainment, data = dd)
## Call:
## loglm(formula = ~satisfaction + Inflight.entertainment, data = dd)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 19441.14  5        0
## Pearson          18475.80  5        0
assocstats(df4)
##                    X^2 df P(> X^2)
## Likelihood Ratio 19441  5        0
## Pearson          18476  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.389 
## Cramer's V        : 0.422
mosaic(~satisfaction + Inflight.entertainment, data = d,expected = ~satisfaction  + Inflight.entertainment, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#(offflight1 <- xtabs(~satisfaction + Checkin.service, data=data1))

df5<-table(data1$satisfaction, data1$Checkin.service)
df5
##                          
##                               0     1     2     3     4     5
##   satisfied                   0  3076  3248 12717 13324 12532
##   neutral or dissatisfied     1  9776  9606 15639 15651  8024
e= data1[, c("satisfaction", "Checkin.service")]
de= xtabs(~ satisfaction + Checkin.service, data = e)
loglm(~ satisfaction + Checkin.service, data = de)
## Call:
## loglm(formula = ~satisfaction + Checkin.service, data = de)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 6601.884  5        0
## Pearson          6390.388  5        0
assocstats(df5)
##                     X^2 df P(> X^2)
## Likelihood Ratio 6601.9  5        0
## Pearson          6390.4  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.241 
## Cramer's V        : 0.248
mosaic(~satisfaction + Checkin.service, data = e,expected = ~satisfaction  + Checkin.service, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#(offflight2 <- xtabs(~satisfaction + Gate.location, data=data1))

df6<-table(data1$satisfaction, data1$Gate.location)
df6
##                          
##                               0     1     2     3     4     5
##   satisfied                   1  8677  8949  9889  9458  7923
##   neutral or dissatisfied     0  8834 10447 18600 14895  5921
f= data1[, c("satisfaction", "Gate.location")]
df= xtabs(~ satisfaction + Gate.location, data = f)
loglm(~ satisfaction + Gate.location, data = df)
## Call:
## loglm(formula = ~satisfaction + Gate.location, data = df)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 2496.147  5        0
## Pearson          2490.876  5        0
assocstats(df6)
##                     X^2 df P(> X^2)
## Likelihood Ratio 2496.1  5        0
## Pearson          2490.9  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.153 
## Cramer's V        : 0.155
mosaic(~satisfaction + Gate.location, data = f,expected = ~satisfaction  + Gate.location, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#(offflight3 <- xtabs(~satisfaction + Baggage.handling, data=data1))

df7<-table(data1$satisfaction, data1$Baggage.handling)
df7
##                          
##                               1     2     3     4     5
##   satisfied                2148  3397  4848 17929 16575
##   neutral or dissatisfied  5075  8086 15719 19345 10472
g= data1[, c("satisfaction", "Baggage.handling")]
dg= xtabs(~ satisfaction + Baggage.handling, data = g)
loglm(~ satisfaction + Baggage.handling, data = dg)
## Call:
## loglm(formula = ~satisfaction + Baggage.handling, data = dg)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 8840.719  4        0
## Pearson          8591.906  4        0
assocstats(dg)
##                     X^2 df P(> X^2)
## Likelihood Ratio 8840.7  4        0
## Pearson          8591.9  4        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.277 
## Cramer's V        : 0.288
mosaic(~satisfaction + Baggage.handling, data = g,expected = ~satisfaction  + Baggage.handling, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#(offflight4 <- xtabs(~satisfaction + Departure.Arrival.time.convenient, data=data1))
df8<-table(data1$satisfaction, data1$Departure.Arrival.time.convenient)
df8
##                          
##                               0     1     2     3     4     5
##   satisfied                2516  7519  7639  7847  9879  9497
##   neutral or dissatisfied  2774  7933  9503 10056 15595 12836
h= data1[, c("satisfaction", "Departure.Arrival.time.convenient")]
dh= xtabs(~ satisfaction + Departure.Arrival.time.convenient, data = h)
loglm(~ satisfaction + Departure.Arrival.time.convenient, data = dh)
## Call:
## loglm(formula = ~satisfaction + Departure.Arrival.time.convenient, 
##     data = dh)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 450.8870  5        0
## Pearson          450.3896  5        0
assocstats(df8)
##                     X^2 df P(> X^2)
## Likelihood Ratio 450.89  5        0
## Pearson          450.39  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.066 
## Cramer's V        : 0.066
mosaic(~satisfaction + Departure.Arrival.time.convenient, data = h,expected = ~satisfaction  + Departure.Arrival.time.convenient, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

#interactions

#do these factors cause frustration for users?

df9<-table(data1$satisfaction, data1$Inflight.service, data1$Departure.Arrival.time.convenient )
df9
## , ,  = 0
## 
##                          
##                              0    1    2    3    4    5
##   satisfied                  0  236  284  299  850  847
##   neutral or dissatisfied    0  314  305  444  936  775
## 
## , ,  = 1
## 
##                          
##                              0    1    2    3    4    5
##   satisfied                  0  297  565  800 3133 2724
##   neutral or dissatisfied    0  694 1471 2880 2385  503
## 
## , ,  = 2
## 
##                          
##                              0    1    2    3    4    5
##   satisfied                  0  270  587  854 3155 2773
##   neutral or dissatisfied    0  702 1411 3434 3181  775
## 
## , ,  = 3
## 
##                          
##                              0    1    2    3    4    5
##   satisfied                  0  326  627  857 3109 2928
##   neutral or dissatisfied    1  952 1572 3279 3100 1152
## 
## , ,  = 4
## 
##                          
##                              0    1    2    3    4    5
##   satisfied                  0  429  691 1073 4016 3670
##   neutral or dissatisfied    0 1209 1682 3210 5740 3754
## 
## , ,  = 5
## 
##                          
##                              0    1    2    3    4    5
##   satisfied                  0  488  685  950 3819 3555
##   neutral or dissatisfied    2 1146 1534 2147 4422 3585
p= data1[, c("satisfaction", "Inflight.service", "Departure.Arrival.time.convenient")]
dp= xtabs(~ satisfaction + Inflight.service +Departure.Arrival.time.convenient, data = p)
loglm(~ satisfaction + Inflight.service + Departure.Arrival.time.convenient, data = dp)
## Call:
## loglm(formula = ~satisfaction + Inflight.service + Departure.Arrival.time.convenient, 
##     data = dp)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 13977.01 60        0
## Pearson          12988.15 60        0
assocstats(df9)
## $`:0`
##                    X^2 df   P(> X^2)
## Likelihood Ratio 35.08  5 1.4505e-06
## Pearson            NaN  5        NaN
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: NaN 
## Cramer's V        : NaN 
## 
## $`:1`
##                     X^2 df P(> X^2)
## Likelihood Ratio 3600.4  5        0
## Pearson             NaN  5      NaN
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: NaN 
## Cramer's V        : NaN 
## 
## $`:2`
##                     X^2 df P(> X^2)
## Likelihood Ratio 3202.8  5        0
## Pearson             NaN  5      NaN
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: NaN 
## Cramer's V        : NaN 
## 
## $`:3`
##                     X^2 df P(> X^2)
## Likelihood Ratio 2780.7  5        0
## Pearson          2673.3  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.36 
## Cramer's V        : 0.386 
## 
## $`:4`
##                     X^2 df P(> X^2)
## Likelihood Ratio 942.96  5        0
## Pearson             NaN  5      NaN
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: NaN 
## Cramer's V        : NaN 
## 
## $`:5`
##                     X^2 df P(> X^2)
## Likelihood Ratio 626.79  5        0
## Pearson          613.19  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.163 
## Cramer's V        : 0.166
mosaic(~satisfaction + Inflight.service + Departure.Arrival.time.convenient, data = p,expected = ~satisfaction  + Inflight.service + Departure.Arrival.time.convenient, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

df10<-table(data1$satisfaction, data1$Inflight.service, data1$Class)
df10
## , ,  = Eco Plus
## 
##                          
##                               0     1     2     3     4     5
##   satisfied                   0   242   303   354   503   434
##   neutral or dissatisfied     0   446   647  1609  2010   920
## 
## , ,  = Business
## 
##                          
##                               0     1     2     3     4     5
##   satisfied                   0   809  1916  3070 14882 13713
##   neutral or dissatisfied     3  1590  2692  4103  4575  2180
## 
## , ,  = Eco
## 
##                          
##                               0     1     2     3     4     5
##   satisfied                   0   995  1220  1409  2697  2350
##   neutral or dissatisfied     0  2981  4636  9682 13179  7444
q= data1[, c("satisfaction", "Inflight.service", "Class")]
dq= xtabs(~ satisfaction + Inflight.service + Class, data = q)
loglm(~ satisfaction + Inflight.service + Class, data = dq)
## Call:
## loglm(formula = ~satisfaction + Inflight.service + Class, data = dq)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 40014.16 27        0
## Pearson          41635.17 27        0
assocstats(df10)
## $`:Eco Plus`
##                     X^2 df P(> X^2)
## Likelihood Ratio 180.86  5        0
## Pearson             NaN  5      NaN
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: NaN 
## Cramer's V        : NaN 
## 
## $`:Business`
##                     X^2 df P(> X^2)
## Likelihood Ratio 7938.3  5        0
## Pearson          8111.7  5        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.375 
## Cramer's V        : 0.405 
## 
## $`:Eco`
##                    X^2 df P(> X^2)
## Likelihood Ratio 602.7  5        0
## Pearson            NaN  5      NaN
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: NaN 
## Cramer's V        : NaN
mosaic(~satisfaction + Inflight.service + Class, data = q, expected = ~satisfaction  + Inflight.service + Class, shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))

df1<-table(data1$satisfaction, data1$Class)
df1
##                          
##                           Eco Plus Business   Eco
##   satisfied                   1836    34390  8671
##   neutral or dissatisfied     5632    15143 37922
a= data1[, c("satisfaction","Class")]
da= xtabs(~ satisfaction + Class, data = a)
loglm(~ satisfaction + Class, data = da)
## Call:
## loglm(formula = ~satisfaction + Class, data = da)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 27672.09  2        0
## Pearson          26402.22  2        0
assocstats(df1)
##                    X^2 df P(> X^2)
## Likelihood Ratio 27672  2        0
## Pearson          26402  2        0
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.451 
## Cramer's V        : 0.505
mosaic(~satisfaction + Class, data = da,expected = ~satisfaction * Class,shade = TRUE, labeling = labeling_values, supress=0,rot_labels=c(0,0,0,90))
## Warning in legend(residuals, gpfun, residuals_type): All residuals are zero.