level1<-table(hflights_df$Origin)%>%
tbl_df()
level1<-group_by(hflights_df, Origin) %>%
summarise(counts=n( )) %>%
mutate(percent= counts/sum(counts))%>%
mutate(end=cumsum(counts)*1/227496*pi*2)%>%
mutate(start=lag(end, n=1, default=0.00000))
level2<-select(hflights_df, Origin, UniqueCarrier)%>%
group_by(Origin,UniqueCarrier)%>%
summarise(counts=n( ))%>%
arrange(Origin, desc(counts))
a<-group_by(hflights_df, UniqueCarrier)
b<-summarise(a, count=n(),AveDepDelay=mean(DepDelay, na.rm=TRUE))
c<-select(b,UniqueCarrier, AveDepDelay)
level2<-left_join(level2,c)
## Joining, by = "UniqueCarrier"
level2<-tbl_df(level2)%>%
mutate(end=cumsum(counts)*1/sum(counts)*pi*2)%>%
mutate(start=lag(end, n=1, default=0.00000))
dd <- ggplot()+theme_no_axes()+geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0.6, r= 0.6+(level2$AveDepDelay)*0.02, fill = UniqueCarrier, start = start, end = end),data = level2)+ geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0.3, r = 0.6, fill = Origin, start = start, end = end), data = level1)+geom_text(aes(x=0.3, y=0.3, label='HOU'),colour='Black', size=4)+geom_text(aes(x=-0.3, y=-0.3, label='IAH'),colour='Black', size=4)
dd
vari<-select(hflights_df, Month, DayOfWeek,DepTime,DepDelay)
vari$DepTime<-ifelse(vari$DepTime<800, 'night', ifelse(vari$DepTime<1600 & vari$DepTime>=800,'day','evening'))
vari<-as.data.frame(vari)
hflights_df<-as.data.frame(hflights_df)
colnames(vari)[3]<-"depGr"
hflights_df<-hflights_df %>%
mutate(depGr=vari$depGr)
vari$Month<-ifelse(vari$Month>=3 & vari$Month <=5, 'Spring', ifelse(vari$Month>=6 & vari$Month<=8,'Summer',ifelse(vari$Month>=9 & vari$Month<=11,'Fall','Winter')))
colnames(vari)[1]<-"season"
hflights_df<-hflights_df %>%
mutate(season=vari$season)
vari$DayOfWeek<-ifelse(vari$DayOfWeek==c('6','7'),'weedend','weekday')
colnames(vari)[2]<-"weekend"
hflights_df<-hflights_df %>%
mutate(weekend=vari$weekend)
Airports<-read.csv('Airports.csv')
colnames(Airports)[1]<-"Dest"
Airports$Dest<-as.character(Airports$Dest)
hflights_df<-left_join(hflights_df,Airports, by='Dest')
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
state<-hflights_df$state
CountsOfDest<-table(hflights_df$Dest)
CountsOfDest<-as.data.frame(CountsOfDest)
colnames(CountsOfDest)[1]<-"Dest"
CountsOfDest$Dest<-as.character(CountsOfDest$Dest)
cc<-hflights_df[c(13,15)]
cc<-as.data.frame(cc)
cc$Dest<-as.character(cc$Dest)
mini2<-left_join(CountsOfDest,cc,by="Dest" )
mini3<-group_by(mini2,Dest)%>%
summarise(mean=mean(DepDelay),na.rm=TRUE)
mini4<-mini3[which(!is.na(mini3$mean)),]
mini4<-as.data.frame(mini4)
mini4
## Dest mean na.rm
## 1 AGS 10.000000 TRUE
## 2 ANC 24.952000 TRUE
## 3 BPT 9.333333 TRUE
## 4 GUC 5.755814 TRUE
## 5 PSP 7.528302 TRUE
## 6 RNO 4.403292 TRUE
vari<-select(hflights_df, Month, DayOfWeek,DepTime,DepDelay)
vari$DepTime<-ifelse(vari$DepTime<800, 'night', ifelse(vari$DepTime<1600 & vari$DepTime>=800,'day','evening'))
vari<-as.data.frame(vari)
colnames(vari)[3]<-"depGr"
vari$Month<-ifelse(vari$Month>=3 & vari$Month <=5, 'Spring', ifelse(vari$Month>=6 & vari$Month<=8,'Summer',ifelse(vari$Month>=9 & vari$Month<=11,'Fall','Winter')))
colnames(vari)[1]<-"season"
vari$DayOfWeek<-ifelse(vari$DayOfWeek==c('6','7'),'weedend','weekday')
colnames(vari)[2]<-"weekend"
vari$DepDelay<-ifelse(vari$DepDelay>0,1,0)
vari_delay<-vari%>%filter(vari$DepDelay==1)
vari_ndelay<-vari%>%filter(vari$DepDelay==0)
par(mfrow = c(1,2))
barplot(table(vari_delay$season), col = c("pink","red","skyblue","blue"),
main="deley_season")
barplot(table(vari_ndelay$season),col = c("pink","red","skyblue","blue"),
main="nondeley_season")
par(mfrow = c(1,2))
barplot(table(vari_delay$weekend),col = c("pink", "skyblue"),
main="deley_Weekday")
barplot(table(vari_ndelay$weekend),col = c("pink", "skyblue"),
main="nondelay_Weekday")
par(mfrow = c(1,2))
barplot(table(vari_delay$depGr),col = c("pink", "skyblue","grey"),
main="deley")
barplot(table(vari_ndelay$depGr),col = c("pink", "skyblue","grey"),
main="nondeley")
ElapsedTime<-ggplot(hflights_df,aes(DepTime,ArrDelay))
ElapsedTime1<-ggplot(hflights_df,aes(DayOfWeek,ArrDelay))
ElapsedTime2<-ggplot(hflights_df,aes(state,ArrDelay))
ElapsedTime3<-ggplot(hflights_df,aes(Distance,ArrpDelay))
ElapsedTime4<-ggplot(hflights_df,aes(UniqueCarrier,ArrpDelay))
ElapsedTime1+geom_point(aes(colour=depGr), size=1) +geom_text(aes(label=UniqueCarrier),color='grey35')
## Warning: Removed 3622 rows containing missing values (geom_point).
## Warning: Removed 3622 rows containing missing values (geom_text).
ElapsedTime1+geom_point()+facet_grid(.~depGr)
## Warning: Removed 3622 rows containing missing values (geom_point).
ElapsedTime1+geom_point()+facet_grid(.~weekend)
## Warning: Removed 3622 rows containing missing values (geom_point).
ElapsedTime2+geom_point()+facet_grid(.~depGr)
## Warning: Removed 3622 rows containing missing values (geom_point).
f1<-lm(ArrDelay~DayofMonth+DepTime+ActualElapsedTime+UniqueCarrier+Dest,data=hflights_df)
f1
##
## Call:
## lm(formula = ArrDelay ~ DayofMonth + DepTime + ActualElapsedTime +
## UniqueCarrier + Dest, data = hflights_df)
##
## Coefficients:
## (Intercept) DayofMonth DepTime
## -129.40736 0.06940 0.01439
## ActualElapsedTime UniqueCarrierAS UniqueCarrierB6
## 0.86753 -16.46878 -56.01842
## UniqueCarrierCO UniqueCarrierDL UniqueCarrierEV
## -3.06124 -0.17374 8.00057
## UniqueCarrierF9 UniqueCarrierFL UniqueCarrierMQ
## 6.87646 1.66187 3.32193
## UniqueCarrierOO UniqueCarrierUA UniqueCarrierUS
## 3.20883 4.67548 -5.94365
## UniqueCarrierWN UniqueCarrierXE UniqueCarrierYV
## 8.11923 3.16509 -3.60761
## DestAEX DestAGS DestAMA
## 58.15424 -0.15789 27.58522
## DestANC DestASE DestATL
## -232.17213 -21.67496 12.76687
## DestAUS DestAVL DestBFL
## 69.63988 5.80473 -92.98272
## DestBHM DestBKG DestBNA
## 28.61224 18.29574 18.01293
## DestBOS DestBPT DestBRO
## -75.32962 82.43237 49.66455
## DestBTR DestBWI DestCAE
## 56.84935 -40.05330 -0.56216
## DestCHS DestCID DestCLE
## -4.00638 0.71345 -24.85108
## DestCLT DestCMH DestCOS
## -7.23471 -20.79405 -5.40721
## DestCRP DestCRW DestCVG
## 62.30720 -8.91999 -6.67669
## DestDAL DestDAY DestDCA
## 61.06589 -8.24678 -36.49792
## DestDEN DestDFW DestDSM
## -12.10597 53.56293 1.80631
## DestDTW DestECP DestEGE
## -32.78235 34.06769 -21.69472
## DestELP DestEWR DestFLL
## 13.62431 -59.08334 -8.01598
## DestGJT DestGPT DestGRK
## -26.03751 47.02062 40.40450
## DestGRR DestGSO DestGSP
## -16.66507 -13.05644 0.20949
## DestGUC DestHDN DestHNL
## -16.80981 -35.29366 -304.28658
## DestHOB DestHRL DestHSV
## 24.21431 57.30371 22.89037
## DestIAD DestICT DestIND
## -40.72754 23.25978 -3.10310
## DestJAN DestJAX DestJFK
## 50.20447 9.55581 NA
## DestLAS DestLAX DestLBB
## -56.53602 -67.65614 34.76868
## DestLCH DestLEX DestLFT
## 59.30091 1.76444 62.26524
## DestLGA DestLIT DestLRD
## -56.77047 45.80853 46.87300
## DestMAF DestMCI DestMCO
## 38.65809 13.49694 0.44968
## DestMDW DestMEM DestMFE
## -16.02234 31.12141 47.79243
## DestMIA DestMKE DestMLU
## -10.34687 -19.55222 43.73989
## DestMOB DestMSP DestMSY
## 44.03279 -29.73266 57.34757
## DestMTJ DestOAK DestOKC
## -27.99558 -102.73337 44.81147
## DestOMA DestONT DestORD
## 0.88433 -64.93741 -11.20388
## DestORF DestPBI DestPDX
## -35.39639 -13.14363 -113.93145
## DestPHL DestPHX DestPIT
## -46.66906 -32.76640 -31.08851
## DestPNS DestPSP DestRDU
## 38.06906 -60.65856 -20.25432
## DestRIC DestRNO DestRSW
## -27.90903 -79.16155 -1.56450
## DestSAN DestSAT DestSAV
## -59.62365 63.74193 4.52944
## DestSDF DestSEA DestSFO
## -0.43665 -118.79375 -93.05168
## DestSHV DestSJC DestSJU
## 58.95550 -99.20335 -93.11053
## DestSLC DestSMF DestSNA
## -54.64950 -96.50500 -72.57247
## DestSTL DestTPA DestTUL
## 10.98552 7.82899 38.60069
## DestTUS DestTYS DestVPS
## -20.75795 9.49313 32.26251
## DestXNA
## 34.98174
summary(f1)
##
## Call:
## lm(formula = ArrDelay ~ DayofMonth + DepTime + ActualElapsedTime +
## UniqueCarrier + Dest, data = hflights_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -76.36 -12.83 -5.62 2.91 978.50
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.294e+02 1.112e+00 -116.367 < 2e-16 ***
## DayofMonth 6.940e-02 6.831e-03 10.160 < 2e-16 ***
## DepTime 1.439e-02 1.393e-04 103.287 < 2e-16 ***
## ActualElapsedTime 8.675e-01 5.235e-03 165.701 < 2e-16 ***
## UniqueCarrierAS -1.647e+01 1.735e+00 -9.494 < 2e-16 ***
## UniqueCarrierB6 -5.602e+01 1.449e+00 -38.670 < 2e-16 ***
## UniqueCarrierCO -3.061e+00 6.676e-01 -4.586 4.53e-06 ***
## UniqueCarrierDL -1.737e-01 9.776e-01 -0.178 0.858942
## UniqueCarrierEV 8.001e+00 9.779e-01 8.181 2.82e-16 ***
## UniqueCarrierF9 6.876e+00 1.251e+00 5.497 3.86e-08 ***
## UniqueCarrierFL 1.662e+00 1.041e+00 1.597 0.110236
## UniqueCarrierMQ 3.322e+00 7.122e-01 4.665 3.09e-06 ***
## UniqueCarrierOO 3.209e+00 7.097e-01 4.522 6.14e-06 ***
## UniqueCarrierUA 4.675e+00 9.434e-01 4.956 7.20e-07 ***
## UniqueCarrierUS -5.944e+00 8.732e-01 -6.807 1.00e-11 ***
## UniqueCarrierWN 8.119e+00 6.887e-01 11.790 < 2e-16 ***
## UniqueCarrierXE 3.165e+00 6.880e-01 4.600 4.22e-06 ***
## UniqueCarrierYV -3.608e+00 3.304e+00 -1.092 0.274886
## DestAEX 5.815e+01 1.265e+00 45.966 < 2e-16 ***
## DestAGS -1.579e-01 2.831e+01 -0.006 0.995549
## DestAMA 2.759e+01 9.864e-01 27.966 < 2e-16 ***
## DestANC -2.322e+02 3.020e+00 -76.870 < 2e-16 ***
## DestASE -2.167e+01 2.741e+00 -7.907 2.65e-15 ***
## DestATL 1.277e+01 7.314e-01 17.454 < 2e-16 ***
## DestAUS 6.964e+01 7.965e-01 87.430 < 2e-16 ***
## DestAVL 5.805e+00 1.619e+00 3.585 0.000338 ***
## DestBFL -9.298e+01 1.452e+00 -64.035 < 2e-16 ***
## DestBHM 2.861e+01 7.907e-01 36.187 < 2e-16 ***
## DestBKG 1.830e+01 2.908e+00 6.291 3.16e-10 ***
## DestBNA 1.801e+01 7.337e-01 24.552 < 2e-16 ***
## DestBOS -7.533e+01 1.013e+00 -74.330 < 2e-16 ***
## DestBPT 8.243e+01 1.635e+01 5.040 4.65e-07 ***
## DestBRO 4.966e+01 9.441e-01 52.607 < 2e-16 ***
## DestBTR 5.685e+01 9.497e-01 59.863 < 2e-16 ***
## DestBWI -4.005e+01 8.276e-01 -48.397 < 2e-16 ***
## DestCAE -5.622e-01 1.335e+00 -0.421 0.673678
## DestCHS -4.006e+00 9.871e-01 -4.059 4.94e-05 ***
## DestCID 7.134e-01 1.511e+00 0.472 0.636811
## DestCLE -2.485e+01 8.529e-01 -29.136 < 2e-16 ***
## DestCLT -7.235e+00 7.373e-01 -9.813 < 2e-16 ***
## DestCMH -2.079e+01 9.545e-01 -21.786 < 2e-16 ***
## DestCOS -5.407e+00 8.991e-01 -6.014 1.82e-09 ***
## DestCRP 6.231e+01 7.839e-01 79.488 < 2e-16 ***
## DestCRW -8.920e+00 1.624e+00 -5.491 3.99e-08 ***
## DestCVG -6.677e+00 9.097e-01 -7.339 2.16e-13 ***
## DestDAL 6.107e+01 7.168e-01 85.196 < 2e-16 ***
## DestDAY -8.247e+00 1.455e+00 -5.668 1.45e-08 ***
## DestDCA -3.650e+01 8.254e-01 -44.217 < 2e-16 ***
## DestDEN -1.211e+01 6.931e-01 -17.465 < 2e-16 ***
## DestDFW 5.356e+01 7.892e-01 67.873 < 2e-16 ***
## DestDSM 1.806e+00 1.250e+00 1.445 0.148366
## DestDTW -3.278e+01 8.411e-01 -38.976 < 2e-16 ***
## DestECP 3.407e+01 1.206e+00 28.256 < 2e-16 ***
## DestEGE -2.169e+01 2.829e+00 -7.669 1.74e-14 ***
## DestELP 1.362e+01 7.483e-01 18.207 < 2e-16 ***
## DestEWR -5.908e+01 8.163e-01 -72.376 < 2e-16 ***
## DestFLL -8.016e+00 8.024e-01 -9.990 < 2e-16 ***
## DestGJT -2.604e+01 1.527e+00 -17.053 < 2e-16 ***
## DestGPT 4.702e+01 9.485e-01 49.575 < 2e-16 ***
## DestGRK 4.040e+01 4.526e+00 8.928 < 2e-16 ***
## DestGRR -1.667e+01 1.234e+00 -13.503 < 2e-16 ***
## DestGSO -1.306e+01 1.268e+00 -10.295 < 2e-16 ***
## DestGSP 2.095e-01 1.009e+00 0.208 0.835446
## DestGUC -1.681e+01 3.127e+00 -5.376 7.60e-08 ***
## DestHDN -3.529e+01 2.843e+00 -12.415 < 2e-16 ***
## DestHNL -3.043e+02 2.453e+00 -124.036 < 2e-16 ***
## DestHOB 2.421e+01 1.750e+00 13.841 < 2e-16 ***
## DestHRL 5.730e+01 7.895e-01 72.582 < 2e-16 ***
## DestHSV 2.289e+01 1.100e+00 20.818 < 2e-16 ***
## DestIAD -4.073e+01 8.890e-01 -45.812 < 2e-16 ***
## DestICT 2.326e+01 9.362e-01 24.844 < 2e-16 ***
## DestIND -3.103e+00 8.732e-01 -3.554 0.000380 ***
## DestJAN 5.020e+01 8.866e-01 56.626 < 2e-16 ***
## DestJAX 9.556e+00 8.239e-01 11.598 < 2e-16 ***
## DestJFK NA NA NA NA
## DestLAS -5.654e+01 7.886e-01 -71.687 < 2e-16 ***
## DestLAX -6.766e+01 7.986e-01 -84.718 < 2e-16 ***
## DestLBB 3.477e+01 9.827e-01 35.380 < 2e-16 ***
## DestLCH 5.930e+01 1.664e+00 35.639 < 2e-16 ***
## DestLEX 1.764e+00 1.304e+00 1.354 0.175888
## DestLFT 6.227e+01 9.080e-01 68.575 < 2e-16 ***
## DestLGA -5.677e+01 8.878e-01 -63.949 < 2e-16 ***
## DestLIT 4.581e+01 9.451e-01 48.469 < 2e-16 ***
## DestLRD 4.687e+01 1.048e+00 44.708 < 2e-16 ***
## DestMAF 3.866e+01 8.342e-01 46.342 < 2e-16 ***
## DestMCI 1.350e+01 7.475e-01 18.056 < 2e-16 ***
## DestMCO 4.497e-01 7.254e-01 0.620 0.535309
## DestMDW -1.602e+01 8.384e-01 -19.110 < 2e-16 ***
## DestMEM 3.112e+01 8.499e-01 36.619 < 2e-16 ***
## DestMFE 4.779e+01 1.057e+00 45.225 < 2e-16 ***
## DestMIA -1.035e+01 8.626e-01 -11.995 < 2e-16 ***
## DestMKE -1.955e+01 9.097e-01 -21.493 < 2e-16 ***
## DestMLU 4.374e+01 1.794e+00 24.386 < 2e-16 ***
## DestMOB 4.403e+01 9.310e-01 47.297 < 2e-16 ***
## DestMSP -2.973e+01 8.568e-01 -34.702 < 2e-16 ***
## DestMSY 5.735e+01 7.341e-01 78.117 < 2e-16 ***
## DestMTJ -2.800e+01 2.296e+00 -12.195 < 2e-16 ***
## DestOAK -1.027e+02 1.364e+00 -75.314 < 2e-16 ***
## DestOKC 4.481e+01 7.819e-01 57.311 < 2e-16 ***
## DestOMA 8.843e-01 8.307e-01 1.065 0.287095
## DestONT -6.494e+01 1.147e+00 -56.619 < 2e-16 ***
## DestORD -1.120e+01 7.014e-01 -15.975 < 2e-16 ***
## DestORF -3.540e+01 1.210e+00 -29.259 < 2e-16 ***
## DestPBI -1.314e+01 9.759e-01 -13.468 < 2e-16 ***
## DestPDX -1.139e+02 1.216e+00 -93.667 < 2e-16 ***
## DestPHL -4.667e+01 8.745e-01 -53.364 < 2e-16 ***
## DestPHX -3.277e+01 7.234e-01 -45.295 < 2e-16 ***
## DestPIT -3.109e+01 9.043e-01 -34.379 < 2e-16 ***
## DestPNS 3.807e+01 9.432e-01 40.360 < 2e-16 ***
## DestPSP -6.066e+01 2.833e+00 -21.410 < 2e-16 ***
## DestRDU -2.025e+01 8.823e-01 -22.955 < 2e-16 ***
## DestRIC -2.791e+01 1.113e+00 -25.078 < 2e-16 ***
## DestRNO -7.916e+01 1.972e+00 -40.139 < 2e-16 ***
## DestRSW -1.564e+00 1.076e+00 -1.453 0.146126
## DestSAN -5.962e+01 8.458e-01 -70.497 < 2e-16 ***
## DestSAT 6.374e+01 7.841e-01 81.295 < 2e-16 ***
## DestSAV 4.529e+00 1.118e+00 4.052 5.09e-05 ***
## DestSDF -4.366e-01 9.693e-01 -0.450 0.652362
## DestSEA -1.188e+02 1.119e+00 -106.168 < 2e-16 ***
## DestSFO -9.305e+01 1.011e+00 -92.030 < 2e-16 ***
## DestSHV 5.896e+01 1.227e+00 48.062 < 2e-16 ***
## DestSJC -9.920e+01 1.253e+00 -79.159 < 2e-16 ***
## DestSJU -9.311e+01 1.678e+00 -55.482 < 2e-16 ***
## DestSLC -5.465e+01 8.911e-01 -61.325 < 2e-16 ***
## DestSMF -9.650e+01 1.207e+00 -79.970 < 2e-16 ***
## DestSNA -7.257e+01 9.936e-01 -73.040 < 2e-16 ***
## DestSTL 1.099e+01 7.866e-01 13.965 < 2e-16 ***
## DestTPA 7.829e+00 7.560e-01 10.355 < 2e-16 ***
## DestTUL 3.860e+01 7.925e-01 48.710 < 2e-16 ***
## DestTUS -2.076e+01 9.142e-01 -22.706 < 2e-16 ***
## DestTYS 9.493e+00 9.896e-01 9.593 < 2e-16 ***
## DestVPS 3.226e+01 1.126e+00 28.651 < 2e-16 ***
## DestXNA 3.498e+01 1.035e+00 33.808 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.3 on 223742 degrees of freedom
## (3622 observations deleted due to missingness)
## Multiple R-squared: 0.1512, Adjusted R-squared: 0.1507
## F-statistic: 304.2 on 131 and 223742 DF, p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(f1)
## Warning: not plotting observations with leverage one:
## 59275
## Warning: not plotting observations with leverage one:
## 59275