# 1. Read the data from TravelMode.csv into datafame mytravcsv ------------------------------------------

#mytravmode <- read.csv('~/Downloads/RWork/W2/TravelMode.csv', header = TRUE)
mytravmode <- read.csv('TravelMode.csv', header = TRUE)
#data.class(mytravmode)
# 1. Summarize the original data -------------------------------------------------------------------------
origsummary <- summary(mytravmode)
origsummary
##        X           individual        mode              choice         
##  Min.   :  1.0   Min.   :  1.0   Length:840         Length:840        
##  1st Qu.:210.8   1st Qu.: 53.0   Class :character   Class :character  
##  Median :420.5   Median :105.5   Mode  :character   Mode  :character  
##  Mean   :420.5   Mean   :105.5                                        
##  3rd Qu.:630.2   3rd Qu.:158.0                                        
##  Max.   :840.0   Max.   :210.0                                        
##       wait           vcost            travel           gcost      
##  Min.   : 0.00   Min.   :  2.00   Min.   :  63.0   Min.   : 30.0  
##  1st Qu.: 0.75   1st Qu.: 23.00   1st Qu.: 234.0   1st Qu.: 71.0  
##  Median :35.00   Median : 39.00   Median : 397.0   Median :101.5  
##  Mean   :34.59   Mean   : 47.76   Mean   : 486.2   Mean   :110.9  
##  3rd Qu.:53.00   3rd Qu.: 66.25   3rd Qu.: 795.5   3rd Qu.:144.0  
##  Max.   :99.00   Max.   :180.00   Max.   :1440.0   Max.   :269.0  
##      income           size      
##  Min.   : 2.00   Min.   :1.000  
##  1st Qu.:20.00   1st Qu.:1.000  
##  Median :34.50   Median :1.000  
##  Mean   :34.55   Mean   :1.743  
##  3rd Qu.:50.00   3rd Qu.:2.000  
##  Max.   :72.00   Max.   :6.000
# 1. Print Mean and Median for the attributes "wait" and "gcost" --------------------------------------------
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=mean)
##    mode     wait     gcost
## 1   air 61.00952 102.64762
## 2   bus 41.65714 115.25714
## 3   car  0.00000  95.41429
## 4 train 35.69048 130.20000
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=median)
##    mode wait gcost
## 1   air   64 100.0
## 2   bus   35 108.0
## 3   car    0  94.5
## 4 train   34 135.0
# 2. Create new data frame mynewtravmode as a subset of the original dataset ----------------------------------
#    mynewtravmode contains only those rows for which choice="yes"---------------------------------------------

mynewtravmode <- subset (mytravmode, choice=="yes")
head (mynewtravmode)
##     X individual  mode choice wait vcost travel gcost income size
## 4   4          1   car    yes    0    10    180    30     35    1
## 8   8          2   car    yes    0    11    255    50     30    2
## 12 12          3   car    yes    0    23    720   101     40    1
## 16 16          4   car    yes    0     5    180    32     70    3
## 20 20          5   car    yes    0     8    600    99     45    2
## 22 22          6 train    yes   40    20    345    57     20    1
# 3. Create new column names for the new data frame, mynewtravmode ---------------------------------------------
# Rename "individual" as "Individual level"
names(mynewtravmode)[names(mynewtravmode)=="individual"] <- "IndividualLevel"

# Rename "mode" as "Travel mode"
names(mynewtravmode)[names(mynewtravmode)=="mode"] <- "TravelMode"

# Rename "choice" as "Travel choice"
names(mynewtravmode)[names(mynewtravmode)=="choice"] <- "TravelChoice"

# Rename "wait" as "Terminal waiting time"
names(mynewtravmode)[names(mynewtravmode)=="wait"] <- "TerminalWaitingTime"

# Rename "vcost" as "Vehicle cost"
names(mynewtravmode)[names(mynewtravmode)=="vcost"] <- "VehicleCost"

# Rename "travel" as "Travel time in vehicle"
names(mynewtravmode)[names(mynewtravmode)=="travel"] <- "TravelTimeInVehicle"

# Rename "gcost" as "Generalized cost measure"
names(mynewtravmode)[names(mynewtravmode)=="gcost"] <- "GeneralizedCost"

# Rename "income" as "Household income"
names(mynewtravmode)[names(mynewtravmode)=="income"] <- "HouseholdIncome"

# Rename "size" as "Party size"
names(mynewtravmode)[names(mynewtravmode)=="size"] <- "PartySize"

head (mynewtravmode)
##     X IndividualLevel TravelMode TravelChoice TerminalWaitingTime VehicleCost
## 4   4               1        car          yes                   0          10
## 8   8               2        car          yes                   0          11
## 12 12               3        car          yes                   0          23
## 16 16               4        car          yes                   0           5
## 20 20               5        car          yes                   0           8
## 22 22               6      train          yes                  40          20
##    TravelTimeInVehicle GeneralizedCost HouseholdIncome PartySize
## 4                  180              30              35         1
## 8                  255              50              30         2
## 12                 720             101              40         1
## 16                 180              32              70         3
## 20                 600              99              45         2
## 22                 345              57              20         1
# 4. Use the summary function to create an overview of the new data frame, mynewtravmode ----------------------------
newsummary <- summary(mynewtravmode)
newsummary
##        X         IndividualLevel   TravelMode        TravelChoice      
##  Min.   :  4.0   Min.   :  1.00   Length:210         Length:210        
##  1st Qu.:211.0   1st Qu.: 53.25   Class :character   Class :character  
##  Median :421.5   Median :105.50   Mode  :character   Mode  :character  
##  Mean   :420.4   Mean   :105.50                                        
##  3rd Qu.:628.0   3rd Qu.:157.75                                        
##  Max.   :840.0   Max.   :210.00                                        
##  TerminalWaitingTime  VehicleCost    TravelTimeInVehicle GeneralizedCost
##  Min.   : 0.00       Min.   :  2.0   Min.   :  65.0      Min.   : 30.0  
##  1st Qu.: 0.00       1st Qu.: 19.0   1st Qu.: 180.0      1st Qu.: 67.0  
##  Median :20.00       Median : 33.0   Median : 305.0      Median :102.5  
##  Mean   :25.01       Mean   : 47.4   Mean   : 430.8      Mean   :103.8  
##  3rd Qu.:40.00       3rd Qu.: 70.0   3rd Qu.: 720.0      3rd Qu.:132.8  
##  Max.   :99.00       Max.   :180.0   Max.   :1440.0      Max.   :238.0  
##  HouseholdIncome   PartySize    
##  Min.   : 2.00   Min.   :1.000  
##  1st Qu.:20.00   1st Qu.:1.000  
##  Median :34.50   Median :1.000  
##  Mean   :34.55   Mean   :1.743  
##  3rd Qu.:50.00   3rd Qu.:2.000  
##  Max.   :72.00   Max.   :6.000
# 4. Print the MEAN values for the new data frame, mynewtravmode and compare with the earlier mean ------------------
aggregate (cbind(TerminalWaitingTime,GeneralizedCost) ~ TravelMode, data=mynewtravmode, FUN=mean)
##   TravelMode TerminalWaitingTime GeneralizedCost
## 1        air            46.53448       113.55172
## 2        bus            25.20000       108.13333
## 3        car             0.00000        89.08475
## 4      train            28.52381       106.61905
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=mean)
##    mode     wait     gcost
## 1   air 61.00952 102.64762
## 2   bus 41.65714 115.25714
## 3   car  0.00000  95.41429
## 4 train 35.69048 130.20000
## 4. Compare means.
## This comparison generally shows lower mean Terminal Waiting Times 
## and lower mean Generalized Costs in the new data frame. 
## This implies that the travel options that finally did get chosen 
## were less expensive and had less idle times at terminals.



# 4. Print the MEDIAN values for the new data frame, mynewtravmode, and compare with the earlier median -------------
aggregate (cbind(TerminalWaitingTime,GeneralizedCost) ~ TravelMode, data=mynewtravmode, FUN=median)
##   TravelMode TerminalWaitingTime GeneralizedCost
## 1        air                45.0           109.0
## 2        bus                22.5           116.5
## 3        car                 0.0            87.0
## 4      train                25.0            92.0
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=median)
##    mode wait gcost
## 1   air   64 100.0
## 2   bus   35 108.0
## 3   car    0  94.5
## 4 train   34 135.0
## 4. Compare medians.
## This comparison generally shows lower median Terminal Waiting Times 
## and lower median Generalized Costs in the new data frame. 
# 5. Rename at least 3 values so that every value in that column is renamed --------------------------------------
library(stringr)

# Replace "yes" with "Accept" and "no" with "Reject" in column "choice" in the first dataframe -----------------------
mytravmode$choice = str_replace_all(mytravmode$choice,"yes","Accept")
mytravmode$choice = str_replace_all(mytravmode$choice,"no","Reject")

head (mytravmode)
##   X individual  mode choice wait vcost travel gcost income size
## 1 1          1   air Reject   69    59    100    70     35    1
## 2 2          1 train Reject   34    31    372    71     35    1
## 3 3          1   bus Reject   35    25    417    70     35    1
## 4 4          1   car Accept    0    10    180    30     35    1
## 5 5          2   air Reject   64    58     68    68     30    2
## 6 6          2 train Reject   44    31    354    84     30    2
# Replace "yes" with "Accept" in column "TravelChoice" in the new dataframe (subset with only "yes" choices) --------------
mynewtravmode$TravelChoice = str_replace_all(mynewtravmode$TravelChoice,"yes","Accept")

# Replace "air" with "flight" in column "mode" in the new dataframe -----------------------
mynewtravmode$TravelMode = str_replace_all(mynewtravmode$TravelMode,"air","flight")

# Replace "train" with "rail" in column "mode" in the new dataframe -----------------------
mynewtravmode$TravelMode = str_replace_all(mynewtravmode$TravelMode,"train","rail")

# 6. Display results of the above changes.
head (mynewtravmode)
##     X IndividualLevel TravelMode TravelChoice TerminalWaitingTime VehicleCost
## 4   4               1        car       Accept                   0          10
## 8   8               2        car       Accept                   0          11
## 12 12               3        car       Accept                   0          23
## 16 16               4        car       Accept                   0           5
## 20 20               5        car       Accept                   0           8
## 22 22               6       rail       Accept                  40          20
##    TravelTimeInVehicle GeneralizedCost HouseholdIncome PartySize
## 4                  180              30              35         1
## 8                  255              50              30         2
## 12                 720             101              40         1
## 16                 180              32              70         3
## 20                 600              99              45         2
## 22                 345              57              20         1
tail (mynewtravmode)
##       X IndividualLevel TravelMode TravelChoice TerminalWaitingTime VehicleCost
## 819 819             205        bus       Accept                  30          35
## 824 824             206        car       Accept                   0          30
## 825 825             207     flight       Accept                  45         126
## 831 831             208        bus       Accept                  50          29
## 836 836             209        car       Accept                   0          27
## 840 840             210        car       Accept                   0          12
##     TravelTimeInVehicle GeneralizedCost HouseholdIncome PartySize
## 819                 775             119              45         1
## 824                 720             108              40         1
## 825                 135             141              40         1
## 831                 265              57               2         1
## 836                 510              82              20         1
## 840                 540              94              70         4