Source: Psi Chi R Contest

#Load packages and import data

#install.packages(ggplot2,dplyr,lubridate,readr)

library(ggplot2)
library(dplyr)
library(readr)

theme_set(theme_bw())

december=read.csv('C:/Users/alanh/Documents/R/Flight Data December 2023 Contest.csv')

Level 1-

Provide descriptive statistics for the actual_flown_miles variable. Indicate the minimum number of miles flow, the maximum number of miles flow, the average number of miles flown, and the median number of miles flown.

sum(is.na(december$actual_flown_miles))
## [1] 0
min(december$actual_flown_miles,na.rm = T) #Minimum = 45
## [1] 45
max(december$actual_flown_miles,na.rm=T) #Maximum = 8440
## [1] 8440
mean(december$actual_flown_miles,na.rm=T) #Average = 2477.738
## [1] 2477.738
median(december$actual_flown_miles,na.rm=T) #Median = 1846
## [1] 1846

Level 2-

Describe the relationship between satisfaction (satisfaction_score) and delay time (arrival_delay_minutes). Note that for satisfaction_score a value of 1 is the lowest (dissatisfied) and 5 is the highest (satisfied). Note that negative values in the arrival delay minutes mean the flight was early, while positive values indicate the flight was late.

summary(december$satisfaction_score) #transform from char into numeric
##    Length     Class      Mode 
##     47074 character character
summary(december$arrival_delay_minutes)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -104.000  -19.000   -9.000   -1.702    4.000  907.000
december$satisfaction_score=as.numeric(december$satisfaction_score)

december$arrival_delay_minutes=as.numeric(december$arrival_delay_minutes)

december1=december %>% 
  filter(complete.cases(satisfaction_score))

december2=december1 %>% 
  filter(complete.cases(arrival_delay_minutes))

cor.test(december2$satisfaction_score,december2$arrival_delay_minutes) #p-value < 2.2e-16, cor -0.07935616
## 
##  Pearson's product-moment correlation
## 
## data:  december2$satisfaction_score and december2$arrival_delay_minutes
## t = -14.885, df = 34961, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08976354 -0.06893144
## sample estimates:
##         cor 
## -0.07935616

Level 3-

Does satisfaction level (satisfaction_score) vary by the cabin (cabin_code) that they’re in?

summary(december2$satisfaction_score)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   2.876   4.000   5.000
december2 %>% 
  ggplot(aes(x=cabin_code,y=satisfaction_score,fill=satisfaction_type))+
  geom_col()+
  labs(title='Satisfaction Score by Cabin Type',y='Satisfaction Score',x='Cabin Type',fill='Satisfaction Type')+
  theme(plot.title = element_text(hjust=.5))

#Yes. Per the column chart, there are varying levels of satisfaction by cabin type.

Level 4-

Describe the relationship between loyalty program membership (loyalty_program_level) and satisfaction (satisfaction_score). Hint: you’ll have to do some recoding for folks who don’t belong to a program

december3=december2 %>% 
  filter(complete.cases(loyalty_program_level))

#Rename blanks as NA
december3$loyalty_program_level[december3$loyalty_program_level==""]="Non-Membership"

december3 %>% 
  ggplot(aes(y=loyalty_program_level,x=satisfaction_score,fill=satisfaction_type))+
  geom_col()+
  labs(title='Loyalty and Satisfaction',y='Program Membership',x='Satisfaction Score',fill='Satisfaction Type')+
  theme(plot.title = element_text(hjust=.5))

LS0tDQp0aXRsZTogJ1BzaSBDaGkgUiBDb250ZXN0IC0gRGVjIDIwMjMnDQphdXRob3I6ICdCeSBBbGFuIExhbScNCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICBhbHdheXNfYWxsb3dfaHRtbDogeWVzDQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogbm8NCiAgICBhbmNob3Jfc2VjdGlvbnM6IFRSVUUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NCg0KWyoqU291cmNlKio6IFBzaSBDaGkgUiBDb250ZXN0XShodHRwczovL29zZi5pby9yODZwMi8pDQoNCg0KYGBge3Igc2V0dXAsaW5jbHVkZT1GfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KG1lc3NhZ2U9Rix3YXJuaW5nPUYsZWNobyA9IFQpDQpgYGANCg0KYGBge3J9DQojTG9hZCBwYWNrYWdlcyBhbmQgaW1wb3J0IGRhdGENCg0KI2luc3RhbGwucGFja2FnZXMoZ2dwbG90MixkcGx5cixsdWJyaWRhdGUscmVhZHIpDQoNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHJlYWRyKQ0KDQp0aGVtZV9zZXQodGhlbWVfYncoKSkNCg0KZGVjZW1iZXI9cmVhZC5jc3YoJ0M6L1VzZXJzL2FsYW5oL0RvY3VtZW50cy9SL0ZsaWdodCBEYXRhIERlY2VtYmVyIDIwMjMgQ29udGVzdC5jc3YnKQ0KYGBgDQoNCg0KIyMgTGV2ZWwgMS0NClByb3ZpZGUgZGVzY3JpcHRpdmUgc3RhdGlzdGljcyBmb3IgdGhlIGFjdHVhbF9mbG93bl9taWxlcyB2YXJpYWJsZS4gSW5kaWNhdGUgdGhlICoqbWluaW11bSoqIG51bWJlciBvZiBtaWxlcyBmbG93LCB0aGUgKiptYXhpbXVtKiogbnVtYmVyIG9mIG1pbGVzIGZsb3csIHRoZSAqKmF2ZXJhZ2UqKiBudW1iZXIgb2YgbWlsZXMgZmxvd24sIGFuZCB0aGUgKiptZWRpYW4qKiBudW1iZXIgb2YgbWlsZXMgZmxvd24uDQoNCmBgYHtyfQ0Kc3VtKGlzLm5hKGRlY2VtYmVyJGFjdHVhbF9mbG93bl9taWxlcykpDQoNCm1pbihkZWNlbWJlciRhY3R1YWxfZmxvd25fbWlsZXMsbmEucm0gPSBUKSAjTWluaW11bSA9IDQ1DQoNCm1heChkZWNlbWJlciRhY3R1YWxfZmxvd25fbWlsZXMsbmEucm09VCkgI01heGltdW0gPSA4NDQwDQoNCm1lYW4oZGVjZW1iZXIkYWN0dWFsX2Zsb3duX21pbGVzLG5hLnJtPVQpICNBdmVyYWdlID0gMjQ3Ny43MzgNCg0KbWVkaWFuKGRlY2VtYmVyJGFjdHVhbF9mbG93bl9taWxlcyxuYS5ybT1UKSAjTWVkaWFuID0gMTg0Ng0KYGBgDQoNCiMjIExldmVsIDItDQpEZXNjcmliZSB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gc2F0aXNmYWN0aW9uIChzYXRpc2ZhY3Rpb25fc2NvcmUpIGFuZCBkZWxheSB0aW1lIChhcnJpdmFsX2RlbGF5X21pbnV0ZXMpLiBOb3RlIHRoYXQgZm9yIHNhdGlzZmFjdGlvbl9zY29yZSBhIHZhbHVlIG9mIDEgaXMgdGhlIGxvd2VzdCAoZGlzc2F0aXNmaWVkKSBhbmQgNSBpcyB0aGUgaGlnaGVzdCAoc2F0aXNmaWVkKS4gTm90ZSB0aGF0IG5lZ2F0aXZlIHZhbHVlcyBpbiB0aGUgYXJyaXZhbCBkZWxheSBtaW51dGVzIG1lYW4gdGhlIGZsaWdodCB3YXMgZWFybHksIHdoaWxlIHBvc2l0aXZlIHZhbHVlcyBpbmRpY2F0ZSB0aGUgZmxpZ2h0IHdhcyBsYXRlLiANCg0KYGBge3J9DQpzdW1tYXJ5KGRlY2VtYmVyJHNhdGlzZmFjdGlvbl9zY29yZSkgI3RyYW5zZm9ybSBmcm9tIGNoYXIgaW50byBudW1lcmljDQpzdW1tYXJ5KGRlY2VtYmVyJGFycml2YWxfZGVsYXlfbWludXRlcykNCg0KZGVjZW1iZXIkc2F0aXNmYWN0aW9uX3Njb3JlPWFzLm51bWVyaWMoZGVjZW1iZXIkc2F0aXNmYWN0aW9uX3Njb3JlKQ0KDQpkZWNlbWJlciRhcnJpdmFsX2RlbGF5X21pbnV0ZXM9YXMubnVtZXJpYyhkZWNlbWJlciRhcnJpdmFsX2RlbGF5X21pbnV0ZXMpDQoNCmRlY2VtYmVyMT1kZWNlbWJlciAlPiUgDQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyhzYXRpc2ZhY3Rpb25fc2NvcmUpKQ0KDQpkZWNlbWJlcjI9ZGVjZW1iZXIxICU+JSANCiAgZmlsdGVyKGNvbXBsZXRlLmNhc2VzKGFycml2YWxfZGVsYXlfbWludXRlcykpDQoNCmNvci50ZXN0KGRlY2VtYmVyMiRzYXRpc2ZhY3Rpb25fc2NvcmUsZGVjZW1iZXIyJGFycml2YWxfZGVsYXlfbWludXRlcykgI3AtdmFsdWUgPCAyLjJlLTE2LCBjb3IgLTAuMDc5MzU2MTYNCmBgYA0KDQojIyBMZXZlbCAzLQ0KRG9lcyBzYXRpc2ZhY3Rpb24gbGV2ZWwgKHNhdGlzZmFjdGlvbl9zY29yZSkgdmFyeSBieSB0aGUgY2FiaW4gKGNhYmluX2NvZGUpIHRoYXQgdGhleeKAmXJlIGluPyANCg0KYGBge3J9DQpzdW1tYXJ5KGRlY2VtYmVyMiRzYXRpc2ZhY3Rpb25fc2NvcmUpDQoNCmRlY2VtYmVyMiAlPiUgDQogIGdncGxvdChhZXMoeD1jYWJpbl9jb2RlLHk9c2F0aXNmYWN0aW9uX3Njb3JlLGZpbGw9c2F0aXNmYWN0aW9uX3R5cGUpKSsNCiAgZ2VvbV9jb2woKSsNCiAgbGFicyh0aXRsZT0nU2F0aXNmYWN0aW9uIFNjb3JlIGJ5IENhYmluIFR5cGUnLHk9J1NhdGlzZmFjdGlvbiBTY29yZScseD0nQ2FiaW4gVHlwZScsZmlsbD0nU2F0aXNmYWN0aW9uIFR5cGUnKSsNCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdD0uNSkpDQoNCiNZZXMuIFBlciB0aGUgY29sdW1uIGNoYXJ0LCB0aGVyZSBhcmUgdmFyeWluZyBsZXZlbHMgb2Ygc2F0aXNmYWN0aW9uIGJ5IGNhYmluIHR5cGUuDQpgYGANCg0KDQojIyBMZXZlbCA0LQ0KDQpEZXNjcmliZSB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gbG95YWx0eSBwcm9ncmFtIG1lbWJlcnNoaXAgKGxveWFsdHlfcHJvZ3JhbV9sZXZlbCkgYW5kIHNhdGlzZmFjdGlvbiAoc2F0aXNmYWN0aW9uX3Njb3JlKS4gSGludDogeW914oCZbGwgaGF2ZSB0byBkbyBzb21lIHJlY29kaW5nIGZvciBmb2xrcyB3aG8gZG9u4oCZdCBiZWxvbmcgdG8gYSBwcm9ncmFtDQoNCmBgYHtyfQ0KZGVjZW1iZXIzPWRlY2VtYmVyMiAlPiUgDQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyhsb3lhbHR5X3Byb2dyYW1fbGV2ZWwpKQ0KDQojUmVuYW1lIGJsYW5rcyBhcyBOQQ0KZGVjZW1iZXIzJGxveWFsdHlfcHJvZ3JhbV9sZXZlbFtkZWNlbWJlcjMkbG95YWx0eV9wcm9ncmFtX2xldmVsPT0iIl09Ik5vbi1NZW1iZXJzaGlwIg0KDQpkZWNlbWJlcjMgJT4lIA0KICBnZ3Bsb3QoYWVzKHk9bG95YWx0eV9wcm9ncmFtX2xldmVsLHg9c2F0aXNmYWN0aW9uX3Njb3JlLGZpbGw9c2F0aXNmYWN0aW9uX3R5cGUpKSsNCiAgZ2VvbV9jb2woKSsNCiAgbGFicyh0aXRsZT0nTG95YWx0eSBhbmQgU2F0aXNmYWN0aW9uJyx5PSdQcm9ncmFtIE1lbWJlcnNoaXAnLHg9J1NhdGlzZmFjdGlvbiBTY29yZScsZmlsbD0nU2F0aXNmYWN0aW9uIFR5cGUnKSsNCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdD0uNSkpDQoNCmBgYA0KDQoNCg==