Session2-ex1
Reading From Multiple Sources
#theData<-NULL
#for (i in 1:100) {
# filename <- paste0(i,".txt")
# info = file.info(filename)
###############check if it is an empty file before running codes
# if(info$size != 0){
# myData <- read.csv(filename, header = FALSE)
# theData <- rbind(theData, myData)
# }
# }
#colnames(theData) <- c("ID", "Date", "Lon", "Lat")
#theData <- as.data.frame(theData)
#theData$Date <- as.POSIXct(theData$Date)Data Mining Stops
#trips <-NULL
#thisRow =theData[1,]
######create a new file 'trips' to aggregate useless lines
#for (i in 2:length(theData$Lat2)) {
# if(thisRow$Lat2 != theData$Lat2[i] | thisRow$Lon2 != theData$Lon2[i] | thisRow$ID != theData$ID[i]){
# waitingTime <- as.integer(difftime(theData$Date[i-1], thisRow$Date, units = "mins"))
# trips <- rbind(trips, cbind(theData[i-1,], waitingTime))
# thisRow =theData[i,]
# }
#}
#write.csv(trips, "trips100.csv", row.names = FALSE)Frequency of Long Stops
trips <- read.csv("./data/trips100.csv")
head(trips[trips$waitingTime>0,])## ID Date Lon Lat Lon2 Lat2 waitingTime
## 16 1 2008-02-03 10:30:29 116.6929 39.85167 116.69 39.85 769
## 30 1 2008-02-04 10:05:10 116.6916 39.85171 116.69 39.85 889
## 35 1 2008-02-04 11:05:09 116.5094 39.90679 116.51 39.91 10
## 39 1 2008-02-05 09:38:34 116.6916 39.85156 116.69 39.85 749
## 49 1 2008-02-05 11:18:34 116.4609 39.91620 116.46 39.92 5
## 74 1 2008-02-05 18:51:20 116.5034 39.90693 116.50 39.91 10
dim(trips)## [1] 60154 7
longStops <- trips[trips$waitingTime>=4*60,]
longStopsFreq <- aggregate( waitingTime~ Lat2+ Lon2, data=longStops, FUN=length)
colnames(longStopsFreq)[3] <- "num"
longStopsFreq <- longStopsFreq[longStopsFreq$num>5,]
longStopsFreq <- longStopsFreq[order(longStopsFreq$num, decreasing = TRUE),]
head(longStopsFreq)## Lat2 Lon2 num
## 98 40.20 116.72 24
## 95 39.72 116.71 14
## 80 40.11 116.66 12
## 71 40.13 116.64 10
## 97 40.13 116.72 10
## 102 40.15 116.73 9
Leaflet Package
For more INFO see https://rstudio.github.io/leaflet/
longStopsFreq1 <- data.frame(lng1=(longStopsFreq$Lon2-0.005),lat1=(longStopsFreq$Lat2-0.005),
lng2=(longStopsFreq$Lon2+0.005),lat2=(longStopsFreq$Lat2+0.005))
dim(longStopsFreq1)## [1] 21 4
library(leaflet)
m <- leaflet() %>%
addTiles() %>%
addRectangles(longStopsFreq1$lng1,longStopsFreq1$lat1,longStopsFreq1$lng2, longStopsFreq1$lat2,
popup = paste0("Number of records: ",longStopsFreq$num),
label = paste0("Number of records: ",longStopsFreq$num))
m #popup: hover and show
#label: click and showggplot2 group feauture
library(lubridate)
library(ggplot2)
days=c("", "Sat","Sun", "Mon", "Tue","Wed", "Thu", "Fri")
movingFreq <- aggregate(ID~day(Date)+hour(Date),data=trips[trips$waitingTime<60,], FUN=length)
colnames(movingFreq) <- c("day","hour", "num")
j <- ggplot(movingFreq, aes(x=hour))+
geom_line(aes(y =num, group=day, colour=days[day]))+
geom_point(aes(y =num, group=day, colour=days[day]))+
labs(title ="Comparision of Frequency Working T Drive Taxis",
x = "Hour", y = "Frequency", col="Day")+
theme(panel.background = element_rect(fill = 'white'))+
scale_color_brewer(palette="Paired")
print(j)Grid Feauture
library(gridExtra)
library(grid)
weekendsFreq<-movingFreq[movingFreq$day<4,]
k1 <- ggplot(weekendsFreq, aes(x=hour))+
geom_line(aes(y =num, group=day, colour=days[day]))+
geom_point(aes(y =num, group=day, colour=days[day]))+
labs(x = "Hour", y = "Frequency", col="Day")+
theme(panel.background = element_rect(fill = 'white'))
weekdaysFreq<-movingFreq[movingFreq$day>=4,]
k2 <- ggplot(weekdaysFreq, aes(x=hour))+
geom_line(aes(y =num, group=day, colour=days[day]))+
geom_point(aes(y =num, group=day, colour=days[day]))+
labs(x = "Hour", y = "Frequency", col="Day")+
theme(panel.background = element_rect(fill = 'white'))
grid.arrange(
k1,
k2,
nrow = 1,
top = "Comparision of Frequency Working T Drive Taxis",
bottom = textGrob(
"Fig 3",
gp = gpar(fontface = 3, fontsize = 9),
hjust = 1,
x = 1
)
)facet_wrap Feauture
nineIDs<-trips[trips$ID %in% c(1,2,4,5,7,11,13,20,21), ]
p1<-ggplot(nineIDs, aes(x=Lat))+
geom_path(aes(y =Lon))
p4 <-
p1 + facet_wrap( ~ ID, nrow = 3) + theme(legend.position = "none") +
ggtitle("Visited Places by 9 Taxi Drivers")
p4