This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
pacman::p_load(data.table, dplyr,lubridate,ggplot2,stringr)
df <- fread('data/bikes_preprocessed_with_LOR_and_districts_and_duration_and_distance_and_speed.csv')
df <- df%>% filter(!providerId==2)
df$timestamp <- ymd_hms(df$timestamp)
df$end_timestamp <- ymd_hms(df$end_timestamp)
df$week <- lubridate::week(df$timestamp)
df$day <- lubridate::date(df$timestamp)
df$week_provider <- str_c(df$week,df$providerId)
groupy.week.provider <- df %>% group_by(providerId,week) %>% summarise(trips=n())
g <- ggplot(groupy.week.provider, aes(week,trips,color=as.factor(providerId)))
g + geom_line()
## plot trips per provider per day
groupy.date.provider <- df %>% group_by(providerId,day) %>% summarise(trips=n())
g <- ggplot(groupy.date.provider, aes(day,trips,color=as.factor(providerId)))
g + geom_line()
## plot median speed per day per provider
groupy.date.provider.speed <- df %>% filter(speed>1 & mode=='trip') %>%group_by(providerId,day) %>% summarise(median_speed=median(speed))
g <- ggplot(groupy.date.provider.speed, aes(day,median_speed,color=as.factor(providerId)))
g + geom_line()
## plot median duration per day per provider
groupy.date.provider.duration <- df %>% filter(speed>1 & mode=='trip'& duration_minutes<50) %>%group_by(providerId,day) %>% summarise(median_duration=median(duration_minutes))
g <- ggplot(groupy.date.provider.duration, aes(day,median_duration,color=as.factor(providerId)))
g + geom_line()
ggplot(df %>% filter(mode=='trip' & speed >1 & speed<30 & duration_minutes<60),aes(distance_direct/1000, fill=as.factor(providerId)))+ geom_histogram(bins = 300,alpha=.5)+ facet_wrap(.~week)
groupy <- df %>% filter(mode=='trip') %>% group_by(from_spatial_na,to_spatial_na) %>% summarise(total=n())
groupy$from_spatial_na<-paste0('start_',groupy$from_spatial_na)
groupy$to_spatial_na<-paste0('end_',groupy$to_spatial_na)
groupy <- groupy %>% ungroup() %>% select(source=from_spatial_na,target=to_spatial_na,value=total)%>% as.data.frame(.)
groupy <- groupy %>% filter(!source==target) %>% filter(value>2000)
df.source <- groupy$source %>% as.data.frame()
df.target <- groupy$target %>% as.data.frame()
df.indexes <- rbind(df.source,df.target) %>% unique()
colnames(df.indexes)<- c('spatial_na')
df.indexes <- df.indexes %>% mutate(id = row_number())
df.indexes$id <- df.indexes$id -1
groupy <- merge(groupy,df.indexes,by.x='source',by.y='spatial_na')
groupy <- merge(groupy,df.indexes,by.x='target',by.y='spatial_na')
groupy <- groupy %>% select(source=id.x,target=id.y,value=value)# %>% filter(target>0)
nodes <-df.indexes %>% select(spatial_na)
colnames(nodes) <- c('name')
nodes$name <- as.character(nodes$name)
nodes <- as.data.frame(nodes)
# Now we have 2 data frames: a 'links' data frame with 3 columns (from, to, value), and a 'nodes' data frame that gives the name of each node.
# Thus we can plot it
# p <- sankeyNetwork(Links = groupy, Nodes = nodes, Source = "source",
# Target = "target", Value = "value", NodeID = "name",
# units = "TWh", fontSize = 12, nodeWidth = 10)
#saveNetwork(p,'sankey.html',selfcontained = TRUE)