#install.packages("hflights")
library("hflights")
## Warning: package 'hflights' was built under R version 3.3.2
str(hflights)
## 'data.frame':    227496 obs. of  21 variables:
##  $ Year             : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
##  $ Month            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ DayofMonth       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ DayOfWeek        : int  6 7 1 2 3 4 5 6 7 1 ...
##  $ DepTime          : int  1400 1401 1352 1403 1405 1359 1359 1355 1443 1443 ...
##  $ ArrTime          : int  1500 1501 1502 1513 1507 1503 1509 1454 1554 1553 ...
##  $ UniqueCarrier    : chr  "AA" "AA" "AA" "AA" ...
##  $ FlightNum        : int  428 428 428 428 428 428 428 428 428 428 ...
##  $ TailNum          : chr  "N576AA" "N557AA" "N541AA" "N403AA" ...
##  $ ActualElapsedTime: int  60 60 70 70 62 64 70 59 71 70 ...
##  $ AirTime          : int  40 45 48 39 44 45 43 40 41 45 ...
##  $ ArrDelay         : int  -10 -9 -8 3 -3 -7 -1 -16 44 43 ...
##  $ DepDelay         : int  0 1 -8 3 5 -1 -1 -5 43 43 ...
##  $ Origin           : chr  "IAH" "IAH" "IAH" "IAH" ...
##  $ Dest             : chr  "DFW" "DFW" "DFW" "DFW" ...
##  $ Distance         : int  224 224 224 224 224 224 224 224 224 224 ...
##  $ TaxiIn           : int  7 6 5 9 9 6 12 7 8 6 ...
##  $ TaxiOut          : int  13 9 17 22 9 13 15 12 22 19 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CancellationCode : chr  "" "" "" "" ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
#?str #str함수는 데이터프레임의 구조를 보여준다. structure 의 약자임 

CountOfDest <- table(hflights$Dest)
#?table 함수는 데이터프레임의 열을 group by 해서 알려줌. 속성별로 몇개씩 있는지 알려줌. 
CountOfDest
## 
##  ABQ  AEX  AGS  AMA  ANC  ASE  ATL  AUS  AVL  BFL  BHM  BKG  BNA  BOS  BPT 
## 2812  724    1 1297  125  125 7886 5022  350  504 2736  110 3481 1752    3 
##  BRO  BTR  BWI  CAE  CHS  CID  CLE  CLT  CMH  COS  CRP  CRW  CVG  DAL  DAY 
## 1692 1762 2551  561 1200  410 2140 4735 1348 1657 4813  357 1535 9820  451 
##  DCA  DEN  DFW  DSM  DTW  ECP  EGE  ELP  EWR  FLL  GJT  GPT  GRK  GRR  GSO 
## 2699 5920 6653  647 2601  729  110 3036 4314 2462  403 1618   42  677  630 
##  GSP  GUC  HDN  HNL  HOB  HRL  HSV  IAD  ICT  IND  JAN  JAX  JFK  LAS  LAX 
## 1123   86  110  402  309 3983  923 1980 1517 1750 2011 2135  695 4082 6064 
##  LBB  LCH  LEX  LFT  LGA  LIT  LRD  MAF  MCI  MCO  MDW  MEM  MFE  MIA  MKE 
## 1333  364  584 2313 2730 1579 1188 2306 3174 3687 2094 2399 1128 2463 1588 
##  MLU  MOB  MSP  MSY  MTJ  OAK  OKC  OMA  ONT  ORD  ORF  PBI  PDX  PHL  PHX 
##  292 1674 2010 6823  164  690 3170 2044  952 5748  717 1253 1235 2367 5096 
##  PIT  PNS  PSP  RDU  RIC  RNO  RSW  SAN  SAT  SAV  SDF  SEA  SFO  SHV  SJC 
## 1664 1539  106 1740  900  243  948 2936 4893  863 1279 2615 2818  787  885 
##  SJU  SLC  SMF  SNA  STL  TPA  TUL  TUS  TYS  VPS  XNA 
##  391 2033 1014 1661 2509 3085 2924 1565 1210  880 1172
length(CountOfDest)
## [1] 116
#length 함수는 데이터프레임의 열의 길이를 알려줌. 벡터도 됨. 
a <- c(1,2,4)
length(a)
## [1] 3
#range 함수는 데이터의 범위를 알려줌. 문자일 경우 오름차순으로 정렬하는듯.
range(a)
## [1] 1 4
b <- c("c2","33","4a")
range(b)
## [1] "33" "c2"
CountOfDest[CountOfDest>=3] 
## 
##  ABQ  AEX  AMA  ANC  ASE  ATL  AUS  AVL  BFL  BHM  BKG  BNA  BOS  BPT  BRO 
## 2812  724 1297  125  125 7886 5022  350  504 2736  110 3481 1752    3 1692 
##  BTR  BWI  CAE  CHS  CID  CLE  CLT  CMH  COS  CRP  CRW  CVG  DAL  DAY  DCA 
## 1762 2551  561 1200  410 2140 4735 1348 1657 4813  357 1535 9820  451 2699 
##  DEN  DFW  DSM  DTW  ECP  EGE  ELP  EWR  FLL  GJT  GPT  GRK  GRR  GSO  GSP 
## 5920 6653  647 2601  729  110 3036 4314 2462  403 1618   42  677  630 1123 
##  GUC  HDN  HNL  HOB  HRL  HSV  IAD  ICT  IND  JAN  JAX  JFK  LAS  LAX  LBB 
##   86  110  402  309 3983  923 1980 1517 1750 2011 2135  695 4082 6064 1333 
##  LCH  LEX  LFT  LGA  LIT  LRD  MAF  MCI  MCO  MDW  MEM  MFE  MIA  MKE  MLU 
##  364  584 2313 2730 1579 1188 2306 3174 3687 2094 2399 1128 2463 1588  292 
##  MOB  MSP  MSY  MTJ  OAK  OKC  OMA  ONT  ORD  ORF  PBI  PDX  PHL  PHX  PIT 
## 1674 2010 6823  164  690 3170 2044  952 5748  717 1253 1235 2367 5096 1664 
##  PNS  PSP  RDU  RIC  RNO  RSW  SAN  SAT  SAV  SDF  SEA  SFO  SHV  SJC  SJU 
## 1539  106 1740  900  243  948 2936 4893  863 1279 2615 2818  787  885  391 
##  SLC  SMF  SNA  STL  TPA  TUL  TUS  TYS  VPS  XNA 
## 2033 1014 1661 2509 3085 2924 1565 1210  880 1172
#데이터프레임에서 [ ] 를 이용해 자료를 셀렉팅 할 수 있음. 기본적으로 쿼리의 where 와 흡사함 
#위의 경우 값이 3 이상인것을 뽑아 오게끔 하는 구문임. 
SelectedDest = CountOfDest[ CountOfDest > 6000]
addmargins( SelectedDest)
## 
##   ATL   DAL   DFW   LAX   MSY   Sum 
##  7886  9820  6653  6064  6823 37246
barplot(SelectedDest)