#install.packages("hflights")
library("hflights")
## Warning: package 'hflights' was built under R version 3.3.2
str(hflights)
## 'data.frame': 227496 obs. of 21 variables:
## $ Year : int 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
## $ Month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DayofMonth : int 1 2 3 4 5 6 7 8 9 10 ...
## $ DayOfWeek : int 6 7 1 2 3 4 5 6 7 1 ...
## $ DepTime : int 1400 1401 1352 1403 1405 1359 1359 1355 1443 1443 ...
## $ ArrTime : int 1500 1501 1502 1513 1507 1503 1509 1454 1554 1553 ...
## $ UniqueCarrier : chr "AA" "AA" "AA" "AA" ...
## $ FlightNum : int 428 428 428 428 428 428 428 428 428 428 ...
## $ TailNum : chr "N576AA" "N557AA" "N541AA" "N403AA" ...
## $ ActualElapsedTime: int 60 60 70 70 62 64 70 59 71 70 ...
## $ AirTime : int 40 45 48 39 44 45 43 40 41 45 ...
## $ ArrDelay : int -10 -9 -8 3 -3 -7 -1 -16 44 43 ...
## $ DepDelay : int 0 1 -8 3 5 -1 -1 -5 43 43 ...
## $ Origin : chr "IAH" "IAH" "IAH" "IAH" ...
## $ Dest : chr "DFW" "DFW" "DFW" "DFW" ...
## $ Distance : int 224 224 224 224 224 224 224 224 224 224 ...
## $ TaxiIn : int 7 6 5 9 9 6 12 7 8 6 ...
## $ TaxiOut : int 13 9 17 22 9 13 15 12 22 19 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CancellationCode : chr "" "" "" "" ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
#?str #str함수는 데이터프레임의 구조를 보여준다. structure 의 약자임
CountOfDest <- table(hflights$Dest)
#?table 함수는 데이터프레임의 열을 group by 해서 알려줌. 속성별로 몇개씩 있는지 알려줌.
CountOfDest
##
## ABQ AEX AGS AMA ANC ASE ATL AUS AVL BFL BHM BKG BNA BOS BPT
## 2812 724 1 1297 125 125 7886 5022 350 504 2736 110 3481 1752 3
## BRO BTR BWI CAE CHS CID CLE CLT CMH COS CRP CRW CVG DAL DAY
## 1692 1762 2551 561 1200 410 2140 4735 1348 1657 4813 357 1535 9820 451
## DCA DEN DFW DSM DTW ECP EGE ELP EWR FLL GJT GPT GRK GRR GSO
## 2699 5920 6653 647 2601 729 110 3036 4314 2462 403 1618 42 677 630
## GSP GUC HDN HNL HOB HRL HSV IAD ICT IND JAN JAX JFK LAS LAX
## 1123 86 110 402 309 3983 923 1980 1517 1750 2011 2135 695 4082 6064
## LBB LCH LEX LFT LGA LIT LRD MAF MCI MCO MDW MEM MFE MIA MKE
## 1333 364 584 2313 2730 1579 1188 2306 3174 3687 2094 2399 1128 2463 1588
## MLU MOB MSP MSY MTJ OAK OKC OMA ONT ORD ORF PBI PDX PHL PHX
## 292 1674 2010 6823 164 690 3170 2044 952 5748 717 1253 1235 2367 5096
## PIT PNS PSP RDU RIC RNO RSW SAN SAT SAV SDF SEA SFO SHV SJC
## 1664 1539 106 1740 900 243 948 2936 4893 863 1279 2615 2818 787 885
## SJU SLC SMF SNA STL TPA TUL TUS TYS VPS XNA
## 391 2033 1014 1661 2509 3085 2924 1565 1210 880 1172
length(CountOfDest)
## [1] 116
#length 함수는 데이터프레임의 열의 길이를 알려줌. 벡터도 됨.
a <- c(1,2,4)
length(a)
## [1] 3
#range 함수는 데이터의 범위를 알려줌. 문자일 경우 오름차순으로 정렬하는듯.
range(a)
## [1] 1 4
b <- c("c2","33","4a")
range(b)
## [1] "33" "c2"
CountOfDest[CountOfDest>=3]
##
## ABQ AEX AMA ANC ASE ATL AUS AVL BFL BHM BKG BNA BOS BPT BRO
## 2812 724 1297 125 125 7886 5022 350 504 2736 110 3481 1752 3 1692
## BTR BWI CAE CHS CID CLE CLT CMH COS CRP CRW CVG DAL DAY DCA
## 1762 2551 561 1200 410 2140 4735 1348 1657 4813 357 1535 9820 451 2699
## DEN DFW DSM DTW ECP EGE ELP EWR FLL GJT GPT GRK GRR GSO GSP
## 5920 6653 647 2601 729 110 3036 4314 2462 403 1618 42 677 630 1123
## GUC HDN HNL HOB HRL HSV IAD ICT IND JAN JAX JFK LAS LAX LBB
## 86 110 402 309 3983 923 1980 1517 1750 2011 2135 695 4082 6064 1333
## LCH LEX LFT LGA LIT LRD MAF MCI MCO MDW MEM MFE MIA MKE MLU
## 364 584 2313 2730 1579 1188 2306 3174 3687 2094 2399 1128 2463 1588 292
## MOB MSP MSY MTJ OAK OKC OMA ONT ORD ORF PBI PDX PHL PHX PIT
## 1674 2010 6823 164 690 3170 2044 952 5748 717 1253 1235 2367 5096 1664
## PNS PSP RDU RIC RNO RSW SAN SAT SAV SDF SEA SFO SHV SJC SJU
## 1539 106 1740 900 243 948 2936 4893 863 1279 2615 2818 787 885 391
## SLC SMF SNA STL TPA TUL TUS TYS VPS XNA
## 2033 1014 1661 2509 3085 2924 1565 1210 880 1172
#데이터프레임에서 [ ] 를 이용해 자료를 셀렉팅 할 수 있음. 기본적으로 쿼리의 where 와 흡사함
#위의 경우 값이 3 이상인것을 뽑아 오게끔 하는 구문임.
SelectedDest = CountOfDest[ CountOfDest > 6000]
addmargins( SelectedDest)
##
## ATL DAL DFW LAX MSY Sum
## 7886 9820 6653 6064 6823 37246
barplot(SelectedDest)
