# Set environmental variables
Sys.setenv(HADOOP_CMD="/usr/bin/hadoop")
Sys.setenv(HADOOP_STREAMING="/usr/hdp/2.3.0.0-2557/hadoop-mapreduce/hadoop-streaming-2.7.1.2.3.0.0-2557.jar")

library(rhdfs)
## Loading required package: rJava
## 
## HADOOP_CMD=/usr/bin/hadoop
## 
## Be sure to run hdfs.init()
library(rmr2)
## Warning: S3 methods 'gorder.default', 'gorder.factor',
## 'gorder.data.frame', 'gorder.matrix', 'gorder.raw' were declared in
## NAMESPACE but not found
## Please review your hadoop settings. See help(hadoop.settings)
# initialize the connection from rstudio to hadoop
hdfs.init()

csv.input.format = make.input.format(format='csv', mode='text', sep=',')

# Specify the path
hdfs.root = '/user/share/student'
# append the data filename to the pathname 
hdfs.data = file.path(hdfs.root, 'test_25K.csv')



#### Problem 1 ####
plane_origin_canceled_map = function(k,lines) {
  origin = lines[[17]]
  canceled = as.numeric(lines[[22]])
  return (keyval(origin,canceled))
}

plane_origin_canceled_reduce = function(origin, counts) {
  keyval(origin, sum(counts,na.rm=TRUE))
}

plane_origin_canceled = function(origin_canceled_input, origin_canceled_output = NULL) {
  mapreduce(input = origin_canceled_input,
            output = origin_canceled_output,
            input.format = csv.input.format,
            map = plane_origin_canceled_map,
            reduce = plane_origin_canceled_reduce)
}

# I was tired of deleting old files in the hadoop filesystem all the time
# So I have my outputs appended with the date and time
outfile_origin_canceled = paste('origin_canceled',format(Sys.time(),"%b_%d_%Y_%H_%M%p"),sep="_")
# append the output filename to the pathname
hdfs_origin_canceled.out = file.path(hdfs.root, outfile_origin_canceled)

out_origin_canceled = plane_origin_canceled(hdfs.data, hdfs_origin_canceled.out)

results_origin_canceled = as.data.frame(from.dfs(out_origin_canceled), stringsAsFactors=F)
colnames(results_origin_canceled) = c('Origin', 'Canceled')

print(results_origin_canceled)
##     Origin Canceled
## 1      ABE        0
## 2      ABI        1
## 3      ABQ        1
## 4      ABY        1
## 5      ACK        0
## 6      ACT        0
## 7      ACV        1
## 8      ACY        0
## 9      ADQ        1
## 10     AEX        0
## 11     AGS        2
## 12     ALB        1
## 13     AMA        0
## 14     ANC        1
## 15     ATL       26
## 16     ATW        0
## 17     AUS        4
## 18     AVL        1
## 19     AVP        0
## 20     AZO        1
## 21     BDL        0
## 22     BET        1
## 23     BFL        0
## 24     BGM        0
## 25     BGR        1
## 26     BHM        0
## 27     BIL        0
## 28     BIS        0
## 29     BMI        0
## 30     BNA        4
## 31     BOI        0
## 32     BOS       22
## 33     BPT        0
## 34     BQK        0
## 35     BQN        0
## 36     BRO        0
## 37     BRW        0
## 38     BTM        0
## 39     BTR        0
## 40     BTV        1
## 41     BUF        1
## 42     BUR        1
## 43     BWI        6
## 44     BZN        0
## 45     CAE        1
## 46     CAK        0
## 47     CDC        0
## 48     CDV        0
## 49     CEC        1
## 50     CHA        1
## 51     CHO        0
## 52     CHS        4
## 53     CIC        0
## 54     CID        2
## 55     CLD        0
## 56     CLE        3
## 57     CLL        0
## 58     CLT        5
## 59     CMH        5
## 60     CMI        0
## 61     COD        1
## 62     COS        1
## 63     CPR        0
## 64     CRP        0
## 65     CRW        0
## 66     CSG        1
## 67     CVG       24
## 68     DAB        2
## 69     DAL        7
## 70     DAY        2
## 71     DBQ        1
## 72     DCA        6
## 73     DEN        6
## 74     DFW       11
## 75     DHN        1
## 76     DLH        0
## 77     DRO        0
## 78     DSM        1
## 79     DTW        6
## 80     EFD        0
## 81     EGE        0
## 82     EKO        0
## 83     ELP        1
## 84     ERI        0
## 85     EUG        0
## 86     EVV        0
## 87     EWR       16
## 88     EYW        0
## 89     FAI        2
## 90     FAR        0
## 91     FAT        1
## 92     FAY        0
## 93     FCA        0
## 94     FLL        2
## 95     FNT        0
## 96     FSD        2
## 97     FSM        0
## 98     FWA        0
## 99     GEG        0
## 100    GFK        0
## 101    GGG        0
## 102    GJT        0
## 103    GNV        0
## 104    GPT        0
## 105    GRB        0
## 106    GRK        0
## 107    GRR        0
## 108    GSO        4
## 109    GSP        5
## 110    GST        0
## 111    GTF        0
## 112    GTR        1
## 113    GUC        0
## 114    HDN        0
## 115    HLN        0
## 116    HNL        0
## 117    HOU       12
## 118    HPN        0
## 119    HRL        0
## 120    HSV        0
## 121    HTS        1
## 122    HVN        1
## 123    IAD       12
## 124    IAH        7
## 125    ICT        2
## 126    IDA        0
## 127    ILE        0
## 128    ILM        0
## 129    IND        3
## 130    IPL        0
## 131    ISP        1
## 132    ITO        0
## 133    IYK        0
## 134    JAC        1
## 135    JAN        0
## 136    JAX        4
## 137    JFK        6
## 138    JNU        1
## 139    KOA        0
## 140    KTN        0
## 141    LAN        0
## 142    LAS        4
## 143    LAW        0
## 144    LAX        9
## 145    LBB        1
## 146    LCH        0
## 147    LEX        2
## 148    LFT        0
## 149    LGA        7
## 150    LGB        0
## 151    LIH        0
## 152    LIT        0
## 153    LNK        0
## 154    LNY        0
## 155    LRD        0
## 156    LSE        0
## 157    LWB        0
## 158    LYH        0
## 159    MAF        0
## 160    MBS        0
## 161    MCI        2
## 162    MCN        0
## 163    MCO        8
## 164    MDT        0
## 165    MDW        1
## 166    MEI        0
## 167    MEM        0
## 168    MFE        0
## 169    MFR        0
## 170    MGM        0
## 171    MHT        0
## 172    MIA        5
## 173    MKE        1
## 174    MKK        0
## 175    MLB        1
## 176    MLI        0
## 177    MLU        0
## 178    MOB        0
## 179    MOD        0
## 180    MOT        0
## 181    MQT        0
## 182    MRY        1
## 183    MSN        0
## 184    MSO        1
## 185    MSP       11
## 186    MSY        4
## 187    MTJ        0
## 188    MYR        2
## 189    OAK        1
## 190    OGG        0
## 191    OKC        2
## 192    OMA        2
## 193    OME        1
## 194    ONT        1
## 195    ORD       45
## 196    ORF        1
## 197    OTZ        0
## 198    OXR        0
## 199    PBI        0
## 200    PDX        3
## 201    PFN        0
## 202    PHF        0
## 203    PHL        6
## 204    PHX        7
## 205    PIA        0
## 206    PIE        0
## 207    PIH        0
## 208    PIT        2
## 209    PNS        0
## 210    PSC        0
## 211    PSG        0
## 212    PSP        1
## 213    PVD        1
## 214    PWM        2
## 215    RAP        0
## 216    RDD        0
## 217    RDM        0
## 218    RDU        6
## 219    RIC        3
## 220    RNO        1
## 221    ROA        2
## 222    ROC        2
## 223    RST        0
## 224    RSW        2
## 225    SAN        6
## 226    SAT        3
## 227    SAV        1
## 228    SBA        0
## 229    SBN        0
## 230    SBP        0
## 231    SCC        0
## 232    SCE        1
## 233    SDF        0
## 234    SEA        5
## 235    SFO        4
## 236    SGF        0
## 237    SGU        0
## 238    SHV        0
## 239    SIT        0
## 240    SJC        3
## 241    SJT        0
## 242    SJU        3
## 243    SLC        6
## 244    SMF        2
## 245    SMX        0
## 246    SNA        4
## 247    SPS        0
## 248    SRQ        1
## 249    STL        3
## 250    STT        1
## 251    STX        0
## 252    SUN        0
## 253    SWF        0
## 254    SYR        0
## 255    TLH        2
## 256    TOL        0
## 257    TPA        8
## 258    TRI        0
## 259    TUL        2
## 260    TUS        0
## 261    TVC        1
## 262    TWF        0
## 263    TXK        0
## 264    TYR        0
## 265    TYS        3
## 266    VCT        0
## 267    VIS        0
## 268    VLD        0
## 269    VPS        0
## 270    WRG        0
## 271    XNA        3
## 272    YAK        0
## 273    YUM        0
#### Problem 2 ####
plane_dest_taxi_map = function(k,lines) {
  destination = lines[[18]]
  TaxiIN = as.numeric(lines[[20]])
  return (keyval(destination,TaxiIN))
}

plane_dest_taxi_reduce = function(origin, counts) {
  keyval(origin, mean(counts,na.rm=TRUE))
}

plane_dest_taxi = function(origin_canceled_input, origin_canceled_output = NULL) {
  mapreduce(input = origin_canceled_input,
            output = origin_canceled_output,
            input.format = csv.input.format,
            map = plane_dest_taxi_map,
            reduce = plane_dest_taxi_reduce)
}

# I was tired of deleting old files in the hadoop filesystem all the time
# So I have my outputs appended with the date and time
outfile_dest_taxi = paste('dest_taxi',format(Sys.time(),"%b_%d_%Y_%H_%M%p"),sep="_")
# append the output filename to the pathname
hdfs_dest_taxi.out = file.path(hdfs.root, outfile_dest_taxi)

out_dest_taxi = plane_dest_taxi(hdfs.data, hdfs_dest_taxi.out)

results_dest_taxi = as.data.frame(from.dfs(out_dest_taxi), stringsAsFactors=F)
colnames(results_dest_taxi) = c('Destination', 'TaxiIN')

print(results_dest_taxi)
##     Destination     TaxiIN
## 1           ABE   3.904762
## 2           ABI   2.333333
## 3           ABQ   4.633803
## 4           ABY   3.333333
## 5           ACK   6.000000
## 6           ACT   4.250000
## 7           ACV   2.636364
## 8           ACY   3.000000
## 9           ADQ   2.500000
## 10          AEX   3.363636
## 11          AGS   2.615385
## 12          AKN   2.500000
## 13          ALB   3.575758
## 14          AMA   3.966667
## 15          ANC   4.697368
## 16          ATL  14.749306
## 17          ATW   3.666667
## 18          AUS   4.492857
## 19          AVL   2.444444
## 20          AVP   3.307692
## 21          AZO   3.312500
## 22          BDL   4.760870
## 23          BET   2.250000
## 24          BFL   5.666667
## 25          BGM  14.600000
## 26          BGR   4.000000
## 27          BHM   3.734177
## 28          BIL   4.550000
## 29          BIS   4.000000
## 30          BMI   3.833333
## 31          BNA   5.080402
## 32          BOI   3.593220
## 33          BOS   7.284783
## 34          BPT   2.400000
## 35          BQK   3.000000
## 36          BQN   4.250000
## 37          BRO   3.500000
## 38          BRW   2.666667
## 39          BTM   4.000000
## 40          BTR   4.263158
## 41          BTV   3.868421
## 42          BUF   4.425743
## 43          BUR   2.590000
## 44          BWI   5.224932
## 45          BZN   3.714286
## 46          CAE  38.238095
## 47          CAK   4.815789
## 48          CDC   3.000000
## 49          CDV   4.000000
## 50          CEC   1.833333
## 51          CHA   2.800000
## 52          CHO   2.363636
## 53          CHS   5.649123
## 54          CIC   2.500000
## 55          CID   3.888889
## 56          CLD   3.200000
## 57          CLE   5.646884
## 58          CLL   6.764706
## 59          CLT   4.759162
## 60          CMH   4.564885
## 61          CMI   3.818182
## 62          COD   3.000000
## 63          COS   6.250000
## 64          CPR   4.000000
## 65          CRP   3.720000
## 66          CRW   3.240000
## 67          CSG   2.500000
## 68          CVG   8.266491
## 69          DAB   3.400000
## 70          DAL   2.993750
## 71          DAY   4.238806
## 72          DCA   7.492795
## 73          DEN   8.079929
## 74          DFW  15.217496
## 75          DHN   3.700000
## 76          DLG   4.000000
## 77          DLH   5.222222
## 78          DRO   2.000000
## 79          DSM   4.615385
## 80          DTW   9.264706
## 81          EFD   3.500000
## 82          EGE   3.600000
## 83          EKO   3.666667
## 84          ELP   3.366667
## 85          ERI   3.333333
## 86          EUG   3.294118
## 87          EVV   3.941176
## 88          EWR   8.128253
## 89          EYW   2.666667
## 90          FAI   3.851852
## 91          FAR   4.875000
## 92          FAT   3.750000
## 93          FAY 182.750000
## 94          FCA   3.900000
## 95          FLL   4.495968
## 96          FLO   5.400000
## 97          FNT   4.241379
## 98          FSD   3.900000
## 99          FSM   6.000000
## 100         FWA   2.782609
## 101         GEG   4.377778
## 102         GFK   5.600000
## 103         GGG   4.400000
## 104         GJT   2.500000
## 105         GNV 146.700000
## 106         GPT   5.055556
## 107         GRB   3.678571
## 108         GRK   3.750000
## 109         GRR   4.943396
## 110         GSO   3.873239
## 111         GSP   3.365854
## 112         GTF   3.666667
## 113         GTR   2.333333
## 114         HDN   3.000000
## 115         HLN   3.444444
## 116         HNL   5.321429
## 117         HOU   4.117647
## 118         HPN   4.966667
## 119         HRL   3.086957
## 120         HSV   4.833333
## 121         HTS   2.000000
## 122         HVN   3.500000
## 123         IAD   8.997930
## 124         IAH   9.780899
## 125         ICT  36.666667
## 126         IDA   2.714286
## 127         ILE   4.615385
## 128         ILM   3.090909
## 129         IND   7.703448
## 130         IPL   2.200000
## 131         ISP   3.470588
## 132         ITO   4.285714
## 133         IYK   3.250000
## 134         JAC   2.500000
## 135         JAN  87.288462
## 136         JAX   4.947368
## 137         JFK   8.664557
## 138         JNU   3.750000
## 139         KOA   4.074074
## 140         KTN   4.200000
## 141         LAN   3.000000
## 142         LAS   5.778163
## 143         LAW   3.833333
## 144         LAX   7.597418
## 145         LBB   3.166667
## 146         LCH   4.875000
## 147         LEX   3.852941
## 148         LFT   3.818182
## 149         LGA   8.155211
## 150         LGB   5.577778
## 151         LIH   4.920000
## 152         LIT  26.409836
## 153         LNK   4.000000
## 154         LRD   2.857143
## 155         LSE   5.666667
## 156         LWB   8.000000
## 157         LYH   3.400000
## 158         MAF   2.894737
## 159         MBS   3.642857
## 160         MCI   4.315789
## 161         MCN   3.222222
## 162         MCO   6.913158
## 163         MDT   4.325581
## 164         MDW   5.895082
## 165         MEI   3.000000
## 166         MEM   5.732143
## 167         MFE   4.300000
## 168         MFR   3.230769
## 169         MGM   3.200000
## 170         MHT   4.473684
## 171         MIA   8.027523
## 172         MKE   4.938462
## 173         MKK   3.000000
## 174         MLB   6.000000
## 175         MLI   4.307692
## 176         MLU   3.500000
## 177         MOB   3.615385
## 178         MOD   3.000000
## 179         MOT   5.000000
## 180         MQT   8.000000
## 181         MRY   3.357143
## 182         MSN   3.548387
## 183         MSO   4.800000
## 184         MSP   6.565619
## 185         MSY   3.614973
## 186         MTJ   3.000000
## 187         MYR  55.035714
## 188         OAK   4.987903
## 189         OGG   4.500000
## 190         OKC   4.213483
## 191         OMA   3.770270
## 192         OME   3.000000
## 193         ONT   3.915385
## 194         ORD   9.239912
## 195         ORF  20.823529
## 196         OTZ   2.500000
## 197         OXR   2.875000
## 198         PBI  19.020408
## 199         PDX   3.720588
## 200         PFN   3.272727
## 201         PHF   5.555556
## 202         PHL   7.063492
## 203         PHX   5.903169
## 204         PIA   3.437500
## 205         PIE   4.666667
## 206         PIH   2.166667
## 207         PIT   6.303318
## 208         PNS   4.000000
## 209         PSC   4.157895
## 210         PSG   2.666667
## 211         PSP   4.000000
## 212         PVD   3.921569
## 213         PWM   4.800000
## 214         RAP   4.166667
## 215         RDD   3.166667
## 216         RDM   2.416667
## 217         RDU   5.174468
## 218         RIC   5.051724
## 219         RNO   4.441441
## 220         ROA   4.100000
## 221         ROC   5.385965
## 222         RST   4.375000
## 223         RSW   3.573529
## 224         SAN   3.682119
## 225         SAT   3.137931
## 226         SAV   4.705882
## 227         SBA   3.956522
## 228         SBN   2.272727
## 229         SBP   3.000000
## 230         SCE   3.285714
## 231         SDF   4.974026
## 232         SEA   5.883085
## 233         SFO   5.452632
## 234         SGF   3.629630
## 235         SGU   2.750000
## 236         SHV   4.666667
## 237         SIT   5.250000
## 238         SJC   3.928000
## 239         SJT   4.500000
## 240         SJU   5.053763
## 241         SLC   5.988327
## 242         SMF   4.078652
## 243         SMX   2.833333
## 244         SNA   6.193548
## 245         SPS   7.000000
## 246         SRQ   4.166667
## 247         STL   4.422680
## 248         STT   3.333333
## 249         STX   3.000000
## 250         SUN   1.900000
## 251         SWF   4.900000
## 252         SYR   4.745098
## 253         TLH   4.192308
## 254         TOL   3.526316
## 255         TPA   4.430712
## 256         TRI   3.555556
## 257         TUL   3.226667
## 258         TUS   4.054795
## 259         TVC   3.272727
## 260         TWF   4.000000
## 261         TXK   3.000000
## 262         TYR   4.266667
## 263         TYS   3.550000
## 264         VCT   3.500000
## 265         VIS   3.000000
## 266         VLD   2.750000
## 267         VPS   8.666667
## 268         WRG   4.666667
## 269         XNA  34.270833
## 270         YAK   2.500000
## 271         YUM   3.000000