July 10, 2016
Flowchart, Network Graph, Dependency graph, UML Diagrams, ER Model…
Everything happens in R
Literate Programming / Reproducible Research
Reporting
DiagrammeRdiagram, Rgraphviz (Bioconductor), networkD3…htmlwidgets dependency), especially for the DOT LanguageDirected Graph
Undirected Graph
DiagrammeR NDF <-DiagrammeR::create_nodes(nodes = c("a", "b", "c"), # required
type = "lower",color="aqua", shape=c("circle","circle","rectangle"),
Value = c(3.5, 2.6, 9.4))
EDF <- DiagrammeR::create_edges(from = c("a", "a"),to = c("b", "c"),color = "green",
data = c(2.7, 8.9, 2.6, 0.6))
head(NDF)
## nodes type label color shape Value ## 1 a lower a aqua circle 3.5 ## 2 b lower b aqua circle 2.6 ## 3 c lower c aqua rectangle 9.4
head(EDF)
## from to rel color data ## 1 a b green 2.7 ## 2 a c green 8.9
GRAPH<-DiagrammeR::create_graph(NDF,EDF) str(GRAPH,max.level=1)
## List of 10
## $ graph_name : NULL
## $ graph_time : NULL
## $ graph_tz : NULL
## $ nodes_df :'data.frame': 3 obs. of 6 variables:
## $ edges_df :'data.frame': 2 obs. of 5 variables:
## $ graph_attrs: NULL
## $ node_attrs : NULL
## $ edge_attrs : NULL
## $ directed : logi TRUE
## $ dot_code : chr "digraph {\n\n 'a' [label = 'a', color = 'aqua', shape = 'circle'] \n 'b' [label = 'b', color = 'aqua', shape = 'circle'] \n "| __truncated__
## - attr(*, "class")= chr "dgr_graph"
node_info, edge_info, node_count…
select_nodes, select_nodes_by_id, select_nodes_in_neighborhood…
trav_out, trav_in, get_path…
See igraph package…
More efficient Data wrangling packages dplyr, tidyr…
Color, Shape, Font, Fontsize…
grViz()) Layout: dot,neato,twopi,circo (See example #3)mermaid()) Layout: Gantt Chart| Filename | Defines |
|---|---|
| agency.txt | One or more transit agencies that provide the data in this feed. Ex: DB, BVG |
| stops.txt | Individual locations where vehicles pick up or drop off passengers. Includes Latitude + Longitudes Ex: Alexanderplatz. |
| routes.txt | Transit routes. A route is a group of trips that are displayed to riders as a single service. Ex: U2 Line |
| trips.txt | Trips for each route. A trip is a sequence of two or more stops that occurs at specific time. Ex: The U2 U-Bahn leaving today from klosterstr. Station at 13:01 |
| stop_times.txt | Times that a vehicle arrives at and departs from individual stops for each trip. |
| calendar.txt | Dates for service IDs using a weekly schedule. Specify when service starts and ends, as well as days of the week where service is available. |
DiagrammeR::grViz("RDBM.gv")
https://daten.berlin.de/datensaetze/vbb-fahrplandaten-ende-juni-bis-dezember-2016
temp <- tempfile()
download.file("http://www.vbb.de/de/download/GTFS_VBB_EndeJun_Dez2016.zip",temp)
stops <- read.table(unz(temp,filename="stops.txt"),header=TRUE,sep=',',
fileEncoding="UTF-8",stringsAsFactors = FALSE)
stop_times <- read.table(unz(temp,filename="stop_times.txt"),header=TRUE,sep=',',
fileEncoding="UTF-8",stringsAsFactors = FALSE)
trips <- read.table(unz(temp,filename="trips.txt"),header=TRUE,sep=',',
fileEncoding="UTF-8",stringsAsFactors = FALSE)
routes <- read.table(unz(temp,filename="routes.txt"),header=TRUE,sep=',',
fileEncoding="UTF-8",stringsAsFactors = FALSE)
unlink(temp)
library(dplyr)
edf_ubahn<-filter(routes,route_short_name%in%
c("U1","U2","U3","U4","U5","U6","U7","U8"))%>%
select(route_id,route_short_name)%>%
left_join(trips,by="route_id")%>%
group_by(trip_headsign,route_short_name)%>%
summarise(trip_id=first(trip_id))%>%
ungroup()%>%
left_join(stop_times,by=c("trip_id"))%>%
left_join(transmute(stop_times,
trip_id=trip_id,
stop_id_to=stop_id,
arrival_time_to=arrival_time,
stop_sequence=stop_sequence-1),
by=c("trip_id","stop_sequence"))%>%
transmute(from=stop_id,to=stop_id_to,type=route_short_name)%>%
filter(!is.na(to))%>%unique%>%
mutate(color=ifelse(type=="U1","blue",NA)%>%
ifelse(type=="U2","red",.)%>%ifelse(type=="U3","green",.)%>%
ifelse(type=="U4","lime",.)%>%ifelse(type=="U5","gold",.)%>%
ifelse(type=="U6","purple",.)%>%ifelse(type=="U7","purple",.)%>%
ifelse(type=="U8","purple",.))%>%
as.data.frame
library(dplyr)
ndf_ubahn<-filter(routes,route_short_name%in%
c("U1","U2","U3","U4","U5","U6","U7","U8"))%>%
select(route_id,route_short_name)%>%
left_join(trips,by="route_id")%>%
group_by(trip_headsign,route_short_name)%>%
summarise(trip_id=first(trip_id))%>%
ungroup()%>%
left_join(stop_times,by=c("trip_id"))%>%
transmute(nodes=stop_id,stop_id=stop_id)%>%
unique%>%
left_join(stops,by="stop_id")%>%
transmute(nodes,label=stop_name,stop_lat,stop_lon)%>%
as.data.frame
head(edf_ubahn)
## from to type color ## 1 9175007 9175006 U5 gold ## 2 9175006 9175005 U5 gold ## 3 9175005 9175004 U5 gold ## 4 9175004 9175001 U5 gold ## 5 9175001 9175004 U5 gold ## 6 9175004 9175005 U5 gold
head(ndf_ubahn)
## nodes label stop_lat stop_lon ## 1 9175007 U Hellersdorf (Berlin) 52.53595 13.60579 ## 2 9175006 U Cottbusser Platz (Berlin) 52.53396 13.59689 ## 3 9175005 U Neue Grottkauer Str. (Berlin) 52.52824 13.59078 ## 4 9175004 U Kaulsdorf-Nord (Berlin) 52.52144 13.58876 ## 5 9175001 S+U Wuhletal (Berlin) 52.51254 13.57548 ## 6 9130002 S+U Pankow (Berlin) 52.56728 13.41228
graph_ubahn<-DiagrammeR::create_graph(ndf_ubahn,edf_ubahn) DiagrammeR::render_graph(graph_ubahn,output="visNetwork")
# Function: get connected nodes within a given number of step
recursive_dep<-function(edf,ndf,nodes,number_step) {
if (number_step == 0) return(nodes) else return(
union(dplyr::filter(edf,from%in%nodes)%>%.$to,nodes)%>%
recursive_dep(edf,ndf,nodes=.,number_step-1))
}
# Nodes Connected to Nollendorf with 3 step
nodes_sub<-recursive_dep(edf=edf_ubahn,ndf=ndf_ubahn,
nodes=dplyr::filter(ndf_ubahn,grepl('Nollendorf',label))%>%.$nodes,
number_step=3)
# Your Input object
ndf_sub<-data.frame(nodes=nodes_sub)%>%dplyr::left_join(ndf_ubahn,by="nodes")
edf_sub<-dplyr::filter(edf_ubahn,from%in%nodes_sub)
graph_sub<-DiagrammeR::create_graph(ndf_sub,edf_sub) DiagrammeR::render_graph(graph_sub,output="visNetwork")
library(magrittr)
con <- url("http://cran.r-project.org/src/contrib/PACKAGES") # DESCRIPTION Files
ndf_pack<- read.dcf(con, all = TRUE)%>% # Parsing dcf into a Data Frame. 1 Line = 1 Node = 1 Package
dplyr::mutate(nodes=Package,label=Package) # Adding required columns `nodes` and `label` for NDF
close(con)
str(ndf_pack,width=100,strict.width="cut") # Quick check
## 'data.frame': 8718 obs. of 16 variables: ## $ Package : chr "A3" "abbyyR" "abc" "ABCanalysis" ... ## $ Version : chr "1.0.0" "0.5.0" "2.1" "1.1.1" ... ## $ Depends : chr "R (>= 2.15.0), xtable, pbapply" "R (>= 3.2.0)" "R (>= 2.10), abc.".. ## $ Suggests : chr "randomForest, e1071" "testthat, rmarkdown, knitr (>= 1.11)" NA NA .. ## $ License : chr "GPL (>= 2)" "MIT + file LICENSE" "GPL (>= 3)" "GPL-3" ... ## $ NeedsCompilation : chr "no" "no" "no" "no" ... ## $ Imports : chr NA "httr, XML, curl, readr, progress" NA "Hmisc, plotrix" ... ## $ LinkingTo : chr NA NA NA NA ... ## $ Enhances : chr NA NA NA NA ... ## $ License_restricts_use: chr NA NA NA NA ... ## $ OS_type : chr NA NA NA NA ... ## $ Priority : chr NA NA NA NA ... ## $ License_is_FOSS : chr NA NA NA NA ... ## $ Archs : chr NA NA NA NA ... ## $ nodes : chr "A3" "abbyyR" "abc" "ABCanalysis" ... ## $ label : chr "A3" "abbyyR" "abc" "ABCanalysis" ...
For the Imports Dependencies
edf_import<-ndf_pack%>%
dplyr::select(Package,Imports)%>%
tidyr::separate(col=Imports,
into=paste0("V",1:35),
sep=", ",
remove=TRUE)%>%
tidyr::gather(key=foo,
value=to,
-Package)%>%
dplyr::transmute(from=Package,
to=to,
rel="Import",
color="DarkRed")%>%
dplyr::filter(!is.na(to))%>%
tidyr::separate(col=to,
into="to",
sep="\\s",
extra="drop")
Creating in the same way a edf_depends for the Depends…
DiagrammeR Packageedf_pack<-DiagrammeR::combine_edges(edf_import,
edf_depends) # Like rbind but different columns allowed
# Nodes Connected to DiagrammeR with 30 step (enough to get them all!)
nodes_DiagrammeR<-union(gtools::getDependencies("DiagrammeR",available=FALSE),"DiagrammeR")
# EDF & NDF
ndf_DiagrammeR<-data.frame(nodes=nodes_DiagrammeR)%>%dplyr::left_join(ndf_pack,by="nodes")
edf_DiagrammeR<-dplyr::filter(edf_pack,from%in%nodes_DiagrammeR)
# The Graph
graph_DiagrammeR<-DiagrammeR::create_graph(ndf_DiagrammeR,
edf_DiagrammeR,
graph_attrs = c("layout = dot","overlap=FALSE"),
node_attrs = c("fontname = Helvetica"))
# Specific Color for the DiagrammeR Node
graph_DiagrammeR <- DiagrammeR::set_node_attr(graph_DiagrammeR,
nodes = c("DiagrammeR"),
node_attr = "fillcolor",
values = "blue")
Flows the directed graph in the direction of rank
DiagrammeR::render_graph(graph_DiagrammeR,output="graph")
Concentric Circles
graph_DiagrammeR<-DiagrammeR::set_global_graph_attr(graph_DiagrammeR,"graph","layout","twopi") DiagrammeR::render_graph(graph_DiagrammeR,output="graph")
Attempts to minimize a global energy function
graph_DiagrammeR<-DiagrammeR::set_global_graph_attr(graph_DiagrammeR,"graph","layout","neato") DiagrammeR::render_graph(graph_DiagrammeR,output="graph")
graph_DiagrammeR<-DiagrammeR::set_global_graph_attr(graph_DiagrammeR,"graph","layout","circo") DiagrammeR::render_graph(graph_DiagrammeR,output="graph")
## R version 3.3.1 (2016-06-21) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 7 x64 (build 7601) Service Pack 1 ## ## locale: ## [1] LC_COLLATE=German_Germany.1252 LC_CTYPE=German_Germany.1252 ## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C ## [5] LC_TIME=German_Germany.1252 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] dplyr_0.4.3 magrittr_1.5 DiagrammeR_0.8.2 ## ## loaded via a namespace (and not attached): ## [1] Rcpp_0.12.5 visNetwork_1.0.1 assertthat_0.1 ## [4] digest_0.6.9 R6_2.1.2 plyr_1.8.4 ## [7] DBI_0.4-1 jsonlite_1.0 formatR_1.4 ## [10] evaluate_0.9 scales_0.4.0 stringi_1.1.1 ## [13] lazyeval_0.2.0 rstudioapi_0.6 rmarkdown_0.9.6.14 ## [16] tools_3.3.1 stringr_1.0.0 htmlwidgets_0.6 ## [19] munsell_0.4.3 parallel_3.3.1 yaml_2.1.13 ## [22] colorspace_1.2-6 htmltools_0.3.5 knitr_1.13