# 1. Read the data from TravelMode.csv into datafame mytravcsv ------------------------------------------
#mytravmode <- read.csv('~/Downloads/RWork/W2/TravelMode.csv', header = TRUE)
mytravmode <- read.csv('TravelMode.csv', header = TRUE)
#data.class(mytravmode)
# 1. Summarize the original data -------------------------------------------------------------------------
origsummary <- summary(mytravmode)
origsummary
## X individual mode choice
## Min. : 1.0 Min. : 1.0 Length:840 Length:840
## 1st Qu.:210.8 1st Qu.: 53.0 Class :character Class :character
## Median :420.5 Median :105.5 Mode :character Mode :character
## Mean :420.5 Mean :105.5
## 3rd Qu.:630.2 3rd Qu.:158.0
## Max. :840.0 Max. :210.0
## wait vcost travel gcost
## Min. : 0.00 Min. : 2.00 Min. : 63.0 Min. : 30.0
## 1st Qu.: 0.75 1st Qu.: 23.00 1st Qu.: 234.0 1st Qu.: 71.0
## Median :35.00 Median : 39.00 Median : 397.0 Median :101.5
## Mean :34.59 Mean : 47.76 Mean : 486.2 Mean :110.9
## 3rd Qu.:53.00 3rd Qu.: 66.25 3rd Qu.: 795.5 3rd Qu.:144.0
## Max. :99.00 Max. :180.00 Max. :1440.0 Max. :269.0
## income size
## Min. : 2.00 Min. :1.000
## 1st Qu.:20.00 1st Qu.:1.000
## Median :34.50 Median :1.000
## Mean :34.55 Mean :1.743
## 3rd Qu.:50.00 3rd Qu.:2.000
## Max. :72.00 Max. :6.000
# 1. Print Mean and Median for the attributes "wait" and "gcost" --------------------------------------------
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=mean)
## mode wait gcost
## 1 air 61.00952 102.64762
## 2 bus 41.65714 115.25714
## 3 car 0.00000 95.41429
## 4 train 35.69048 130.20000
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=median)
## mode wait gcost
## 1 air 64 100.0
## 2 bus 35 108.0
## 3 car 0 94.5
## 4 train 34 135.0
# 2. Create new data frame mynewtravmode as a subset of the original dataset ----------------------------------
# mynewtravmode contains only those rows for which choice="yes"---------------------------------------------
mynewtravmode <- subset (mytravmode, choice=="yes")
head (mynewtravmode)
## X individual mode choice wait vcost travel gcost income size
## 4 4 1 car yes 0 10 180 30 35 1
## 8 8 2 car yes 0 11 255 50 30 2
## 12 12 3 car yes 0 23 720 101 40 1
## 16 16 4 car yes 0 5 180 32 70 3
## 20 20 5 car yes 0 8 600 99 45 2
## 22 22 6 train yes 40 20 345 57 20 1
# 3. Create new column names for the new data frame, mynewtravmode ---------------------------------------------
# Rename "individual" as "Individual level"
names(mynewtravmode)[names(mynewtravmode)=="individual"] <- "IndividualLevel"
# Rename "mode" as "Travel mode"
names(mynewtravmode)[names(mynewtravmode)=="mode"] <- "TravelMode"
# Rename "choice" as "Travel choice"
names(mynewtravmode)[names(mynewtravmode)=="choice"] <- "TravelChoice"
# Rename "wait" as "Terminal waiting time"
names(mynewtravmode)[names(mynewtravmode)=="wait"] <- "TerminalWaitingTime"
# Rename "vcost" as "Vehicle cost"
names(mynewtravmode)[names(mynewtravmode)=="vcost"] <- "VehicleCost"
# Rename "travel" as "Travel time in vehicle"
names(mynewtravmode)[names(mynewtravmode)=="travel"] <- "TravelTimeInVehicle"
# Rename "gcost" as "Generalized cost measure"
names(mynewtravmode)[names(mynewtravmode)=="gcost"] <- "GeneralizedCost"
# Rename "income" as "Household income"
names(mynewtravmode)[names(mynewtravmode)=="income"] <- "HouseholdIncome"
# Rename "size" as "Party size"
names(mynewtravmode)[names(mynewtravmode)=="size"] <- "PartySize"
head (mynewtravmode)
## X IndividualLevel TravelMode TravelChoice TerminalWaitingTime VehicleCost
## 4 4 1 car yes 0 10
## 8 8 2 car yes 0 11
## 12 12 3 car yes 0 23
## 16 16 4 car yes 0 5
## 20 20 5 car yes 0 8
## 22 22 6 train yes 40 20
## TravelTimeInVehicle GeneralizedCost HouseholdIncome PartySize
## 4 180 30 35 1
## 8 255 50 30 2
## 12 720 101 40 1
## 16 180 32 70 3
## 20 600 99 45 2
## 22 345 57 20 1
# 4. Use the summary function to create an overview of the new data frame, mynewtravmode ----------------------------
newsummary <- summary(mynewtravmode)
newsummary
## X IndividualLevel TravelMode TravelChoice
## Min. : 4.0 Min. : 1.00 Length:210 Length:210
## 1st Qu.:211.0 1st Qu.: 53.25 Class :character Class :character
## Median :421.5 Median :105.50 Mode :character Mode :character
## Mean :420.4 Mean :105.50
## 3rd Qu.:628.0 3rd Qu.:157.75
## Max. :840.0 Max. :210.00
## TerminalWaitingTime VehicleCost TravelTimeInVehicle GeneralizedCost
## Min. : 0.00 Min. : 2.0 Min. : 65.0 Min. : 30.0
## 1st Qu.: 0.00 1st Qu.: 19.0 1st Qu.: 180.0 1st Qu.: 67.0
## Median :20.00 Median : 33.0 Median : 305.0 Median :102.5
## Mean :25.01 Mean : 47.4 Mean : 430.8 Mean :103.8
## 3rd Qu.:40.00 3rd Qu.: 70.0 3rd Qu.: 720.0 3rd Qu.:132.8
## Max. :99.00 Max. :180.0 Max. :1440.0 Max. :238.0
## HouseholdIncome PartySize
## Min. : 2.00 Min. :1.000
## 1st Qu.:20.00 1st Qu.:1.000
## Median :34.50 Median :1.000
## Mean :34.55 Mean :1.743
## 3rd Qu.:50.00 3rd Qu.:2.000
## Max. :72.00 Max. :6.000
# 4. Print the MEAN values for the new data frame, mynewtravmode and compare with the earlier mean ------------------
aggregate (cbind(TerminalWaitingTime,GeneralizedCost) ~ TravelMode, data=mynewtravmode, FUN=mean)
## TravelMode TerminalWaitingTime GeneralizedCost
## 1 air 46.53448 113.55172
## 2 bus 25.20000 108.13333
## 3 car 0.00000 89.08475
## 4 train 28.52381 106.61905
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=mean)
## mode wait gcost
## 1 air 61.00952 102.64762
## 2 bus 41.65714 115.25714
## 3 car 0.00000 95.41429
## 4 train 35.69048 130.20000
## 4. Compare means.
## This comparison generally shows lower mean Terminal Waiting Times
## and lower mean Generalized Costs in the new data frame.
## This implies that the travel options that finally did get chosen
## were less expensive and had less idle times at terminals.
# 4. Print the MEDIAN values for the new data frame, mynewtravmode, and compare with the earlier median -------------
aggregate (cbind(TerminalWaitingTime,GeneralizedCost) ~ TravelMode, data=mynewtravmode, FUN=median)
## TravelMode TerminalWaitingTime GeneralizedCost
## 1 air 45.0 109.0
## 2 bus 22.5 116.5
## 3 car 0.0 87.0
## 4 train 25.0 92.0
aggregate (cbind(wait,gcost) ~ mode, data=mytravmode, FUN=median)
## mode wait gcost
## 1 air 64 100.0
## 2 bus 35 108.0
## 3 car 0 94.5
## 4 train 34 135.0
## 4. Compare medians.
## This comparison generally shows lower median Terminal Waiting Times
## and lower median Generalized Costs in the new data frame.
# 5. Rename at least 3 values so that every value in that column is renamed --------------------------------------
library(stringr)
# Replace "yes" with "Accept" and "no" with "Reject" in column "choice" in the first dataframe -----------------------
mytravmode$choice = str_replace_all(mytravmode$choice,"yes","Accept")
mytravmode$choice = str_replace_all(mytravmode$choice,"no","Reject")
head (mytravmode)
## X individual mode choice wait vcost travel gcost income size
## 1 1 1 air Reject 69 59 100 70 35 1
## 2 2 1 train Reject 34 31 372 71 35 1
## 3 3 1 bus Reject 35 25 417 70 35 1
## 4 4 1 car Accept 0 10 180 30 35 1
## 5 5 2 air Reject 64 58 68 68 30 2
## 6 6 2 train Reject 44 31 354 84 30 2
# Replace "yes" with "Accept" in column "TravelChoice" in the new dataframe (subset with only "yes" choices) --------------
mynewtravmode$TravelChoice = str_replace_all(mynewtravmode$TravelChoice,"yes","Accept")
# Replace "air" with "flight" in column "mode" in the new dataframe -----------------------
mynewtravmode$TravelMode = str_replace_all(mynewtravmode$TravelMode,"air","flight")
# Replace "train" with "rail" in column "mode" in the new dataframe -----------------------
mynewtravmode$TravelMode = str_replace_all(mynewtravmode$TravelMode,"train","rail")
# 6. Display results of the above changes.
head (mynewtravmode)
## X IndividualLevel TravelMode TravelChoice TerminalWaitingTime VehicleCost
## 4 4 1 car Accept 0 10
## 8 8 2 car Accept 0 11
## 12 12 3 car Accept 0 23
## 16 16 4 car Accept 0 5
## 20 20 5 car Accept 0 8
## 22 22 6 rail Accept 40 20
## TravelTimeInVehicle GeneralizedCost HouseholdIncome PartySize
## 4 180 30 35 1
## 8 255 50 30 2
## 12 720 101 40 1
## 16 180 32 70 3
## 20 600 99 45 2
## 22 345 57 20 1
tail (mynewtravmode)
## X IndividualLevel TravelMode TravelChoice TerminalWaitingTime VehicleCost
## 819 819 205 bus Accept 30 35
## 824 824 206 car Accept 0 30
## 825 825 207 flight Accept 45 126
## 831 831 208 bus Accept 50 29
## 836 836 209 car Accept 0 27
## 840 840 210 car Accept 0 12
## TravelTimeInVehicle GeneralizedCost HouseholdIncome PartySize
## 819 775 119 45 1
## 824 720 108 40 1
## 825 135 141 40 1
## 831 265 57 2 1
## 836 510 82 20 1
## 840 540 94 70 4