loading all the data from the directory
data = list.files(pattern="*.csv")
list2env(
lapply(setNames(data, make.names(gsub("*.csv$", "", data))),
read.csv), envir = .GlobalEnv)
## <environment: R_GlobalEnv>
listing all data in the directory
alldata <- mget(ls(pattern="^X\\d+"))
Merge all individual data from 2012
bike2012<- Reduce(smartbind,alldata)
Removing unessccary columns
bike2012<- bike2012 %>% select(Rental.Id, Bike.Id, End.Date,EndStation.Id,Start.Date,StartStation.Id)
Eliminate missing values
sapply(bike2012,function(x) sum(is.na(x)))
## Rental.Id Bike.Id End.Date EndStation.Id
## 0 14 0 0
## Start.Date StartStation.Id
## 0 0
bike2012<- na.omit(bike2012)
head(bike2012[1:5,])
## Rental.Id Bike.Id End.Date EndStation.Id Start.Date
## 1:1 9340768 893 04/01/2012 00:20 169 04/01/2012 00:00
## 1:2 9345966 5621 04/01/2012 00:05 319 04/01/2012 00:01
## 1:3 9349921 4365 04/01/2012 00:12 343 04/01/2012 00:01
## 1:4 9341757 2708 04/01/2012 00:08 379 04/01/2012 00:04
## 1:5 9344212 17 04/01/2012 00:20 340 04/01/2012 00:05
## StartStation.Id
## 1:1 224
## 1:2 3
## 1:3 311
## 1:4 225
## 1:5 88
Saving dataset
#write.table(bike2012,"bike2012.csv", sep = ",")
od<- function(data){
odmatrix<-as.data.frame.matrix(table(data$StartStation.Id,data$EndStation.Id))
frq<-as.data.frame(table(data$StartStation.Id,data$EndStation.Id))
frq<-frq[with(frq, order(Freq,decreasing = T)),]
colnames(frq)<- c("StartStation.Id","EndStation.Id","Freq")
print(head(odmatrix[c(1:10),c(1:10)]))
print(head(frq[c(1:10),]))
}
od(bike2012)
## 0 1 2 3 4 5 6 7 8 9
## 1 116 319 0 36 60 4 10 1 2 5
## 2 142 1 400 0 2 15 10 1 1 12
## 3 147 12 0 100 34 2 5 1 0 45
## 4 245 12 0 100 246 1 31 16 0 86
## 5 202 0 10 0 5 437 11 1 5 9
## 6 226 4 3 1 68 3 253 70 7 5
## StartStation.Id EndStation.Id Freq
## 189758 191 191 16160
## 305219 307 307 10962
## 301203 303 303 8729
## 402609 406 406 5503
## 403613 407 407 5479
## 400601 404 404 4608