d0=read.csv("Baggage.csv",header = T)
attach(d0)
AmericanEagle=d0[Airline=="American Eagle",]
Hawaiian=d0[Airline=="Hawaiian",]
United=d0[Airline=="United",]
library(forecast)
AmericanEagle_Baggage_ts= ts(AmericanEagle$Baggage, frequency=12, start=c(2004,1), end=c(2010,12))
plot.ts(AmericanEagle_Baggage_ts,main="American Eagle",xlab="Date",ylab="Baggage Complaints",type="b")+grid()
Hawaiian_Baggage_ts= ts(Hawaiian$Baggage, frequency=12, start=c(2004,1), end=c(2010,12))
plot.ts(Hawaiian_Baggage_ts,main="Hawaiian",xlab="Date",ylab="Baggage Complaints",type="b")+grid()
United_Baggage_ts= ts(United$Baggage, frequency=12, start=c(2004,1), end=c(2010,12))
plot.ts(United_Baggage_ts,main="United",xlab="Date",ylab="Baggage Complaints",type="b")+grid()
#AmericanEagle
A2004=AmericanEagle[which(AmericanEagle$Year==2004),]
A2005=AmericanEagle[which(AmericanEagle$Year==2005),]
A2006=AmericanEagle[which(AmericanEagle$Year==2006),]
A2007=AmericanEagle[which(AmericanEagle$Year==2007),]
A2008=AmericanEagle[which(AmericanEagle$Year==2008),]
A2009=AmericanEagle[which(AmericanEagle$Year==2009),]
A2010=AmericanEagle[which(AmericanEagle$Year==2010),]
plot(A2004$Baggage~A2004$Month,type="b",xlab="Month",ylab="Baggage Complaints",
main="American Eagle",ylim=c(5000,37000),xlim=c(0,14),color="black",pch=1)
lines(A2005$Baggage~A2005$Month,col="red",type="b",pch=9)
lines(A2006$Baggage~A2006$Month,col="orange",type="b",pch=10)
lines(A2007$Baggage~A2007$Month,col="yellow3",type="b",pch=15)
lines(A2008$Baggage~A2008$Month,col="green",type="b",,pch=16)
lines(A2009$Baggage~A2009$Month,col="blue",type="b",pch=17)
lines(A2010$Baggage~A2010$Month,col="palevioletred1",type="b",pch=18)
Name=c("2004","2005","2006","2007","2008","2009","2010")
legend("topright", Name, ncol=3,
col=c("black","red","orange","yellow3","green","blue","palevioletred1"),
bg = "gray90", pch = c(1,9,10,15,16,17,18),lty =1)
#Hawaiian
H2004=Hawaiian[which(Hawaiian$Year==2004),]
H2005=Hawaiian[which(Hawaiian$Year==2005),]
H2006=Hawaiian[which(Hawaiian$Year==2006),]
H2007=Hawaiian[which(Hawaiian$Year==2007),]
H2008=Hawaiian[which(Hawaiian$Year==2008),]
H2009=Hawaiian[which(Hawaiian$Year==2009),]
H2010=Hawaiian[which(Hawaiian$Year==2010),]
plot(H2004$Baggage~H2004$Month,type="b",xlab="Month",ylab="Baggage Complaints",
main="Hawaiian",ylim=c(1000,3500),xlim=c(0,14),color="black",pch=1)
lines(H2005$Baggage~H2005$Month,col="red",type="b",pch=9)
lines(H2006$Baggage~H2006$Month,col="orange",type="b",pch=10)
lines(H2007$Baggage~H2007$Month,col="yellow3",type="b",pch=15)
lines(H2008$Baggage~H2008$Month,col="green",type="b",,pch=16)
lines(H2009$Baggage~H2009$Month,col="blue",type="b",pch=17)
lines(H2010$Baggage~H2010$Month,col="palevioletred1",type="b",pch=18)
legend("topright", Name, ncol=3,
col=c("black","red","orange","yellow3","green","blue","palevioletred1"),
bg = "gray90", pch = c(1,9,10,15,16,17,18),lty =1)
#United
U2004=United[which(United$Year==2004),]
U2005=United[which(United$Year==2005),]
U2006=United[which(United$Year==2006),]
U2007=United[which(United$Year==2007),]
U2008=United[which(United$Year==2008),]
U2009=United[which(United$Year==2009),]
U2010=United[which(United$Year==2010),]
plot(U2004$Baggage~U2004$Month,type="b",xlab="Month",ylab="Baggage Complaints",main="United",
ylim=c(8000,52000),xlim=c(0,14),color="black",pch=1)
lines(U2005$Baggage~U2005$Month,col="red",type="b",pch=9)
lines(U2006$Baggage~U2006$Month,col="orange",type="b",pch=10)
lines(U2007$Baggage~U2007$Month,col="yellow3",type="b",pch=15)
lines(U2008$Baggage~U2008$Month,col="green",type="b",,pch=16)
lines(U2009$Baggage~U2009$Month,col="blue",type="b",pch=17)
lines(U2010$Baggage~U2010$Month,col="palevioletred1",type="b",pch=18)
legend("topright", Name, ncol=3,
col=c("black","red","orange","yellow3","green","blue","palevioletred1"),
bg = "gray90", pch = c(1,9,10,15,16,17,18),lty =1)
In general, there are high baggage complaints in June to August and December in a year. For American Eagle, the number of the baggae complaints increased from 2004 to 2006 and then decreased until 2010. For Hawaiian, the number of the baggage complaints increased from 2004 and reached the highest complaint in 2008 and after it decreased. However, in 2010, there were abnormally high complaint in Oct, Noc, and Dec compared to the previous year. *For United, the baggage complaints increased from 2004 to 2006 and 2007 and after 2007 it started decreasing. After 2008, the baggage complaints imrpoved a lot until 2010.
AmericanEagle_Baggage_ts= ts(AmericanEagle$Baggage, frequency=12, start=c(2004,1), end=c(2010,12))
plot.ts(AmericanEagle_Baggage_ts,col="red",pch=5,ylim=c(1000,45000),xlab="Date",ylab="Baggage Complaints",type="b")+grid()
Hawaiian_Baggage_ts= ts(Hawaiian$Baggage, frequency=12, start=c(2004,1), end=c(2010,12))
par(new=T)
plot.ts(Hawaiian_Baggage_ts,col="blue",pch=6,ylim=c(1000,45000),xlab="Date",ylab="Baggage Complaints",type="b")+grid()
United_Baggage_ts= ts(United$Baggage, frequency=12, start=c(2004,1), end=c(2010,12))
par(new=T)
plot.ts(United_Baggage_ts,col="green",pch=7,ylim=c(1000,45000),xlab="Date",ylab="Baggage Complaints",type="b")+grid()
legend("topright", c("United","American Eagle","Hawaiian"), ncol=1,
col=c("green","red","blue"),
bg = "gray90", pch = c(7,5,6),lty =3)
It’s difficult to tell that which airline company has the better baggage handling practice since the numbers of the enplaned flights are hugely different. It’s obvious that if you have more people taking the flights, you will definitely get more baggage complaints. Therefore, the number of baggage complaints can’t mean to which company has the best practice. However, from the graph, after 2007, the complaints in United and American Eagle decreased, so it can show that these two company have some good practices to handle the complaints and make them decrease in the following years. For Hawaiian, it’s difficult to tell the trend of the baggage complaints since the y-scale is to broad for Hawaiian. putting the 3 companies’ baggage complaints into one graph is not the good way to discuss about whether the company improve the practice to handle the complaints.
Based on the number of the complaints the airline received, the United got the highest number of complaints; Hawaiian got the least number of the complaints. It makes sense that they got more complaints since there are more enplanned flights for United.
From the graph, it seems that they are getting better for dealing with the complaints. It’s hard to tell whether Hawaiian is getting better or worse because of the broad y-scale that need to fit the other airplane companies.
Not really. It’s not approriate to make these 3 company into one graph since the numbers of the complaints from differenct companies are hugely different. It may sacrifice the small number of the complaint in Hawaiian Airline making it difficult to tell the trend. It’s more fair to get the ratio of complaints to enplaned paddengers and get the numbers from these 3 companies to make the comparison.
#scheduled flights:
round(colMeans(AmericanEagle[,6,drop=FALSE]),2)
## Scheduled
## 41314.05
round(colMeans(Hawaiian[,6,drop=FALSE]),2)
## Scheduled
## 4844.68
round(colMeans(United[,6,drop=FALSE]),2)
## Scheduled
## 38225.3
#enplaned flights:
round(colMeans(AmericanEagle[,8,drop=FALSE]),2)
## Enplaned
## 1396726
round(colMeans(Hawaiian[,8,drop=FALSE]),2)
## Enplaned
## 594174.2
round(colMeans(United[,8,drop=FALSE]),2)
## Enplaned
## 4620712
| AmericanEagle | Hawaiian | United |
|---|---|---|
| 41,314.05 | 4,844.68 | 38,225.30 |
| 1,396,726 | 594,174 | 4,620,712 |
It provides me with the average of scheduled flights and the average of enplaned passengers by different airlines. For United and American Eagle, United got the smaller number of scheduled flights than American Eagle, but United had more people actually taking the flights than American Eagle. It may because United have better baggage practice so people were willing to take United’s flights rather than American Eagle. For Hawaiian, the number of enplaned paseengers and scheduled flights is far less than the other two companies so it’s hard to tell the performance of the company. It’s better to get the percentage or ratio to make the comparison among these 3 companies.
#Baggage % = Baggage / Enplaned ×100 %. Display average Baggage % for each airline.
paste(round((sum(AmericanEagle[,5])/sum(AmericanEagle[,8]))*100,2),"%")
## [1] "1.05 %"
paste(round((sum(Hawaiian[,5])/sum(Hawaiian[,8]))*100,2),"%")
## [1] "0.27 %"
paste(round((sum(United[,5])/sum(United[,8]))*100,2),"%")
## [1] "0.47 %"
| AmericanEagle | Hawaiian | United |
|---|---|---|
| 1.05% | 0.27% | 0.47% |
No. From the table of enplaned passengers and scheduled flights, I expected United may have better performance than American Eagle since there were more people taking United’s airplanes than American Eagle. Actually, United got the highest percentage of baggage complaints from passengers. The second highest is American Eagle and Hawaiian seems to have the best baggage complaints practice to improve this problem.
AmericanEagle$Baggage_percernt=round(((AmericanEagle[,5])/(AmericanEagle[,8]))*100,2)
AmericanEagle_BaggagePercent_ts= ts(AmericanEagle$Baggage_percernt, frequency=12, start=c(2004,1), end=c(2010,12))
plot.ts(AmericanEagle_BaggagePercent_ts,col="red",pch=5,ylim=c(0,3),xlab="Date",ylab="Baggage Complaints Percentage (%)",type="b")+grid()
par(new=T)
Hawaiian$Baggage_percernt=round(((Hawaiian[,5])/(Hawaiian[,8]))*100,2)
Hawaiian_BaggagePercent_ts= ts(Hawaiian$Baggage_percernt, frequency=12, start=c(2004,1), end=c(2010,12))
plot.ts(Hawaiian_BaggagePercent_ts,col="blue",pch=6,ylim=c(0,3),xlab="Date",ylab="Baggage Complaints Percentage (%)",type="b")+grid()
par(new=T)
United$Baggage_percernt=round(((United[,5])/(United[,8]))*100,2)
United_BaggagePercent_ts= ts(United$Baggage_percernt, frequency=12, start=c(2004,1), end=c(2010,12))
plot.ts(United_BaggagePercent_ts,col="green",pch=7,ylim=c(0,3),xlab="Date",ylab="Baggage Complaints Percentage (%)",type="b")+grid()
legend("topright", c("United","American Eagle","Hawaiian"), ncol=1,
col=c("green","red","blue"),
bg = "gray90", pch = c(7,5,6),lty =3)
I think making the baggage complaint percentages among the 3 companies in one graph is better since it’s easier to make the comparison that which company get the high percentage of the complaits from passengers and have the bad performance on dealing with complaints.
plot.ts(AmericanEagle_BaggagePercent_ts,col="red",pch=5,ylim=c(0,3),xlab="Date",ylab="Baggage Complaints Percentage (%)",type="b",main="American Eagle")+grid()
plot.ts(Hawaiian_BaggagePercent_ts,col="blue",pch=6,ylim=c(0,0.5),xlab="Date",ylab="Baggage Complaints Percentage (%)",type="b",main="Hawaiian")+grid()
plot.ts(United_BaggagePercent_ts,col="green",pch=7,ylim=c(0,1),xlab="Date",ylab="Baggage Complaints Percentage (%)",type="b",main="United")+grid()
Based on KPI baggage complaint percentage, it’s more clear and straightforward to compare baggage complaints among 3 companies since the scale of the companies is differenct, it’s hard to tell which company has the better solutions to deal with it merely based on the number of the baggage complaints. It may lead to a bias by only looking at the number of the complaints.
new_data_AmericanEagle=data.frame(matrix(0, ncol = 2, nrow = 12))
colnames(new_data_AmericanEagle)=c("Month","MonthlyBaggage")
new_data_AmericanEagle$Month= ("Month"=1:nrow(new_data_AmericanEagle))
new_data_Hawaiian=data.frame(matrix(0, ncol = 2, nrow = 12))
colnames(new_data_Hawaiian)=c("Month","MonthlyBaggage")
new_data_Hawaiian$Month= ("Month"=1:nrow(new_data_Hawaiian))
new_data_United=data.frame(matrix(0, ncol = 2, nrow = 12))
colnames(new_data_United)=c("Month","MonthlyBaggage")
new_data_United$Month= ("Month"=1:nrow(new_data_United))
for (i in c(1:12)){
new_data_AmericanEagle$MonthlyBaggage[i]=round(mean((AmericanEagle[AmericanEagle$Month==i,"Baggage"])/
(AmericanEagle[AmericanEagle$Month==i,"Enplaned"])*100),2)
}
for (i in c(1:12)){
new_data_Hawaiian$MonthlyBaggage[i]=round(mean((Hawaiian[Hawaiian$Month==i,"Baggage"])/
(Hawaiian[Hawaiian$Month==i,"Enplaned"])*100),2)
}
for (i in c(1:12)){
new_data_United$MonthlyBaggage[i]=round(mean((United[United$Month==i,"Baggage"])/
(United[United$Month==i,"Enplaned"])*100),2)
}
plot(AmericanEagle$Baggage_percernt~AmericanEagle$Month,pch=20,col="red",ylim=c(0,2.5),ylab="Monthly Average of Baggage Complaints (%)",xlab="Month",main="American Eagle Monthly Average of Baggage Complaints (2004-2010)")
axis(1,at=c(1:12))
lines(new_data_AmericanEagle$MonthlyBaggage~new_data_AmericanEagle$Month,ylim=c(0,2.5),type="b",col="red",pch=5)
plot(Hawaiian$Baggage_percernt~Hawaiian$Month,pch=20,col="blue",ylim=c(0,0.5),ylab="Monthly Average of Baggage Complaints (%)",xlab="Month",main="Hawaiian Monthly Average of Baggage Complaints (2004-2010)")
axis(1,at=c(1:12))
lines(new_data_Hawaiian$MonthlyBaggage~new_data_Hawaiian$Month,type="b",col="blue",pch=6)
plot(United$Baggage_percernt~United$Month,pch=20,col="green",ylim=c(0,1),ylab="Monthly Average of Baggage Complaints (%)",xlab="Month",main="United Monthly Average of Baggage Complaints (2004-2010)")
axis(1,at=c(1:12))
lines(new_data_United$MonthlyBaggage~new_data_United$Month,type="b",col="green",pch=7)
plot(new_data_AmericanEagle$MonthlyBaggage~new_data_AmericanEagle$Month,ylim=c(0,2),ylab="Monthly Average of Baggage Complaints (%)",xlab="Month",main="Monthly Average of Baggage Complaints (2004-2010)",type="b",col="red",pch=5)
axis(1,at=c(1:12))
lines(new_data_Hawaiian$MonthlyBaggage~new_data_Hawaiian$Month,type="b",col="blue",pch=6)
lines(new_data_United$MonthlyBaggage~new_data_United$Month,type="b",col="green",pch=7)
legend("topright", c("United","American Eagle","Hawaiian"), ncol=1,
col=c("green","red","blue"),
bg = "gray90", pch = c(7,5,6),lty =3)
Generally speaking, these 3 companies have the higher complaint during June to August and December to January. United has the more obvious seasonal fluctuation compared to American eagle and Hawaiian.
A=round(mean(new_data_AmericanEagle$MonthlyBaggage),2)
H=round(mean(new_data_Hawaiian$MonthlyBaggage),2)
U=round(mean(new_data_United$MonthlyBaggage),2)
new_data_AmericanEagle$Average=A
new_data_Hawaiian$Average=H
new_data_United$Average=U
M_A=lm(new_data_AmericanEagle$MonthlyBaggage~Month)
M_H=lm(new_data_Hawaiian$MonthlyBaggage~Month)
M_U=lm(new_data_United$MonthlyBaggage~Month)
plot(AmericanEagle$Baggage_percernt~AmericanEagle$Month,pch=20,col="red",ylim=c(0,2.5),ylab="Monthly Average of Baggage Complaints (%)",xlab="Month",main="American Eagle Monthly Average of Baggage Complaints (2004-2010)")
axis(1,at=c(1:12))
lines(new_data_AmericanEagle$MonthlyBaggage~new_data_AmericanEagle$Month,ylim=c(0,2.5),type="b",col="red",pch=5)
lines(new_data_AmericanEagle$Average,lty=2,col="red",lwd=2)
abline(M_A,lty=3,col="blue",lwd=2)
legend("topright", c("Average line","Regression line"), ncol=1,
col=c("red","blue"),bg = "gray90", lty = c(2,3))
plot(Hawaiian$Baggage_percernt~Hawaiian$Month,pch=20,col="blue",ylim=c(0,0.5),ylab="Monthly Average of Baggage Complaints (%)",xlab="Month",main="Hawaiian Monthly Average of Baggage Complaints (2004-2010)")
axis(1,at=c(1:12))
lines(new_data_Hawaiian$MonthlyBaggage~new_data_Hawaiian$Month,type="b",col="blue",pch=6)
lines(new_data_Hawaiian$Average,lty=2,col="red",lwd=2)
abline(M_H,lty=3,col="blue",lwd=2)
legend("topright", c("Average line","Regression line"), ncol=1,
col=c("red","blue"),bg = "gray90", lty = c(2,3))
plot(United$Baggage_percernt~United$Month,pch=20,col="green",ylim=c(0,1),ylab="Monthly Average of Baggage Complaints (%)",xlab="Month",main="United Monthly Average of Baggage Complaints (2004-2010)")
axis(1,at=c(1:12))
lines(new_data_United$MonthlyBaggage~new_data_United$Month,type="b",col="green",pch=7)
lines(new_data_United$Average,lty=2,col="red",lwd=2)
abline(M_U,lty=3,col="blue",lwd=2)
legend("topright", c("Average line","Regression line"), ncol=1,
col=c("red","blue"),bg = "gray90", lty = c(2,3))
From the graphs of the monthly average baggage (%), it can tell that there are not big difference between average line and regression line. They have the similar seasonal trend. The baggage complaints rely on the seasonal factor with some level.
ceo=read.csv("CEOcompensation.csv",header = T)
length(which(ceo$Gender=="F"))
## [1] 2
ceo[which.min(ceo$Age),"Age"]
## [1] 45
ceo[which.max(ceo$Age),"Age"]
## [1] 81
round(colMeans(ceo[,"Age",drop=F]),2)
## Age
## 58.38
round(sum(ceo[,"X2008.Salary",drop=F]),2)
## [1] 201.8
length(which(ceo$Years.with.company==ceo$Years.as.company.CEO))
## [1] 40
ceo$year_before_ceo=ceo[,"Years.with.company"]-ceo[,"Years.as.company.CEO"]
round(colMeans(ceo[,"year_before_ceo",drop=F]),2)
## year_before_ceo
## 11.51
which.max(table(ceo$Industry))
## Oil & Gas Operations
## 19
ceo$compensation=ceo[,"X2008.Salary"]+ceo[,"X2008.Bonus"]+ceo[,"X2008.Other"]+ceo[,"X2008.Stock.gains"]
round(mean(ceo$compensation),2)
## [1] 18.68
ceo[which.max(ceo$compensation),"CEO"]
## [1] Lawrence J Ellison
## 179 Levels: A Greig Woodring Alan G Lafley ... William R Johnson
ceo[which.max(ceo$compensation),"compensation"]
## [1] 556.98
sort(ceo$compensation, decreasing = TRUE)
## [1] 556.98 222.64 154.58 116.93 90.47 87.48 68.62 61.31 51.28 49.26
## [11] 47.55 44.49 42.27 39.27 39.22 36.71 36.01 35.13 34.59 33.05
## [21] 30.33 30.04 29.75 28.98 27.14 26.94 26.76 26.24 25.51 25.33
## [31] 24.29 24.24 23.22 23.11 22.95 20.96 20.73 19.95 19.75 19.31
## [41] 19.16 18.90 18.68 18.59 18.52 18.12 18.00 17.83 17.55 16.01
## [51] 15.99 15.89 15.69 14.04 13.94 13.74 13.67 13.20 13.17 12.98
## [61] 12.60 12.55 12.52 12.05 11.77 11.55 11.48 11.43 11.31 11.30
## [71] 11.09 11.07 11.05 10.50 10.47 10.44 10.30 10.26 10.25 10.02
## [81] 9.66 9.52 9.29 9.29 9.24 9.09 8.99 8.72 8.61 8.23
## [91] 8.18 7.85 7.76 7.74 7.65 7.54 7.22 7.02 7.02 6.77
## [101] 6.76 6.74 6.65 6.61 6.32 6.27 6.12 5.98 5.87 5.85
## [111] 5.83 5.78 5.73 5.69 5.68 5.57 5.30 5.24 5.11 5.08
## [121] 5.03 5.01 4.88 4.68 4.66 4.53 4.49 4.47 4.40 4.30
## [131] 4.17 4.16 4.12 4.11 4.11 4.01 3.90 3.90 3.82 3.70
## [141] 3.66 3.62 3.47 3.47 3.39 3.34 3.31 3.08 3.07 2.97
## [151] 2.86 2.83 2.78 2.64 2.60 2.39 2.31 2.30 2.16 2.16
## [161] 2.05 1.99 1.92 1.71 1.54 1.48 1.41 1.40 1.35 1.35
## [171] 1.33 1.28 1.18 1.07 1.03 0.97 0.77 0.10 0.00
ceo[ceo$compensation==222.64,"Industry"]
## [1] Oil & Gas Operations
## 26 Levels: Aerospace & Defense Banking ... Utilities
ceo[,"AgeCategory"]=NA
sum_1=0
sum_2=0
sum_3=0
sum_4=0
sum_5=0
count_1=0
count_2=0
count_3=0
count_4=0
count_5=0
for (i in 1:length(ceo$Nr)){
if (ceo$Age[i]>=45&ceo$Age[i]<50){
sum_1=sum_1+ceo$X2008.Salary[i]
count_1=count_1+1
average_1=sum_1/count_1
}
if (ceo$Age[i]>=50&ceo$Age[i]<55){
sum_2=sum_2+ceo$X2008.Salary[i]
count_2=count_2+1
average_2=sum_2/count_2
}
if (ceo$Age[i]>=55&ceo$Age[i]<60){
sum_3=sum_3+ceo$X2008.Salary[i]
count_3=count_3+1
average_3=sum_3/count_3
}
if (ceo$Age[i]>=60&ceo$Age[i]<70){
sum_4=sum_4+ceo$X2008.Salary[i]
count_4=count_4+1
average_4=sum_4/count_4
}
if (ceo$Age[i]>=70){
sum_5=sum_5+ceo$X2008.Salary[i]
count_5=count_5+1
average_5=sum_5/count_5
}
}
round(average_1,2)
## [1] 0.97
round(average_2,2)
## [1] 1.11
round(average_3,2)
## [1] 1.07
round(average_4,2)
## [1] 1.13
round(average_5,2)
## [1] 1.79
| 45–50 | 50-55 | 55-60 | 60-70 | 70 or more |
|---|---|---|---|---|
| 0.97 million | 1.11 million | 1.07 million | 1.13 million | 1.79 million |
IndustryMedian=read.csv("IndustryMedians.csv",header = T)
for (i in 1:length(ceo$Nr)){
for (j in 1:length(IndustryMedian$Industry)){
if (ceo$Industry[i]==IndustryMedian$Industry[j]){
ceo$differece[i]=((ceo$compensation[i]-IndustryMedian$Total.compensation[j])/IndustryMedian$Total.compensation[j])*100
}
}
}
length(which(ceo$differece>=100))
## [1] 63
No. If there are the value mostly toward to highest value or lowest value, it may lead to get the different median from the way that you get each person’s compensation first and calculate the median of it.