2020 network

Simple scrape of URL and re-ordering of the rows by size

#install.packages('janitor')
library(janitor)

library(rvest)
library(tidyverse)
library(DT)

#scrape the table from url into d variable
url='https://www.ohdsi.org/web/wiki/doku.php?id=resources:2020_data_network'

content <- read_html(url)
tables <- content %>% html_table(fill = TRUE)


d <- tables[[1]]


#fix columns

cols=d  %>% slice(1) %>% unlist() %>% unname()



#reduce cols (only  8 have actual data)
d %<>%  select(1:8) 



#add names
names(d)=cols[1:8]
#names(d)


d %<>% clean_names()
#drop row 1 with col names
d %<>% slice(2:n())

#make the number nicer (replace comma with nothing and convert to number)
#still have some problematic entries
d %<>% mutate(pt_cnt=as.numeric(str_replace_all(number_of_patients,',',''))) %>% arrange(desc(pt_cnt))
NAs introduced by coercion
#drop first column, no value provided
d$number = NULL

List

d %>% datatable()
d$year=2020
d20=d

2019

url='https://www.ohdsi.org/web/wiki/doku.php?id=resources:2019_data_network'
content <- read_html(url)
tables <- content %>% html_table(fill = TRUE)


d <- tables[[1]]


#fix columns

#cols=d  %>% slice(1) %>% unlist() %>% unname()



#reduce cols (only  8 have actual data)
#d %<>%  select(1:8) 



#add names
#names(d)


d %<>% clean_names()
#drop row 1 with col names
#d %<>% slice(2:n())

#make the number nicer (replace comma with nothing and convert to number)
#still have some problematic entries
d %<>% mutate(pt_cnt=as.numeric(str_replace_all(number_of_patients,',',''))) %>% arrange(desc(pt_cnt))
NAs introduced by coercion
#drop first column, no value provided
d$number = NULL

List



d %>% datatable()
d$year=2019
d19=d
alld=bind_rows(d20,d19) %>% arrange(desc(year),desc(pt_cnt))
alld %>% write_csv('ohdsi-data-network-combined.csv')

Combined years

alld %>% datatable(rownames = FALSE)

Analysis by dataset type

alld %>% group_by(year,data_type) %>% summarize(n=n()) %>% arrange(year,desc(n)) %>% datatable(rownames = FALSE)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IA0KICBodG1sX25vdGVib29rOiANCiAgICB0b2M6IHllcw0KLS0tDQoNCiMgMjAyMCBuZXR3b3JrDQoNClNpbXBsZSBzY3JhcGUgb2YgVVJMIGFuZCByZS1vcmRlcmluZyBvZiB0aGUgcm93cyBieSBzaXplDQoNCmBgYHtyfQ0KI2luc3RhbGwucGFja2FnZXMoJ2phbml0b3InKQ0KbGlicmFyeShqYW5pdG9yKQ0KDQpsaWJyYXJ5KHJ2ZXN0KQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KERUKQ0KDQojc2NyYXBlIHRoZSB0YWJsZSBmcm9tIHVybCBpbnRvIGQgdmFyaWFibGUNCnVybD0naHR0cHM6Ly93d3cub2hkc2kub3JnL3dlYi93aWtpL2Rva3UucGhwP2lkPXJlc291cmNlczoyMDIwX2RhdGFfbmV0d29yaycNCg0KY29udGVudCA8LSByZWFkX2h0bWwodXJsKQ0KdGFibGVzIDwtIGNvbnRlbnQgJT4lIGh0bWxfdGFibGUoZmlsbCA9IFRSVUUpDQoNCg0KZCA8LSB0YWJsZXNbWzFdXQ0KDQoNCiNmaXggY29sdW1ucw0KDQpjb2xzPWQgICU+JSBzbGljZSgxKSAlPiUgdW5saXN0KCkgJT4lIHVubmFtZSgpDQoNCg0KDQojcmVkdWNlIGNvbHMgKG9ubHkgIDggaGF2ZSBhY3R1YWwgZGF0YSkNCmQgJTw+JSAgc2VsZWN0KDE6OCkgDQoNCg0KDQojYWRkIG5hbWVzDQpuYW1lcyhkKT1jb2xzWzE6OF0NCiNuYW1lcyhkKQ0KDQoNCmQgJTw+JSBjbGVhbl9uYW1lcygpDQojZHJvcCByb3cgMSB3aXRoIGNvbCBuYW1lcw0KZCAlPD4lIHNsaWNlKDI6bigpKQ0KDQojbWFrZSB0aGUgbnVtYmVyIG5pY2VyIChyZXBsYWNlIGNvbW1hIHdpdGggbm90aGluZyBhbmQgY29udmVydCB0byBudW1iZXIpDQojc3RpbGwgaGF2ZSBzb21lIHByb2JsZW1hdGljIGVudHJpZXMNCmQgJTw+JSBtdXRhdGUocHRfY250PWFzLm51bWVyaWMoc3RyX3JlcGxhY2VfYWxsKG51bWJlcl9vZl9wYXRpZW50cywnLCcsJycpKSkgJT4lIGFycmFuZ2UoZGVzYyhwdF9jbnQpKQ0KDQojZHJvcCBmaXJzdCBjb2x1bW4sIG5vIHZhbHVlIHByb3ZpZGVkDQpkJG51bWJlciA9IE5VTEwNCg0KYGBgDQoNCiMjIExpc3QNCmBgYHtyfQ0KZCAlPiUgZGF0YXRhYmxlKCkNCmQkeWVhcj0yMDIwDQpkMjA9ZA0KYGBgDQojIDIwMTkNCmBgYHtyfQ0KdXJsPSdodHRwczovL3d3dy5vaGRzaS5vcmcvd2ViL3dpa2kvZG9rdS5waHA/aWQ9cmVzb3VyY2VzOjIwMTlfZGF0YV9uZXR3b3JrJw0KY29udGVudCA8LSByZWFkX2h0bWwodXJsKQ0KdGFibGVzIDwtIGNvbnRlbnQgJT4lIGh0bWxfdGFibGUoZmlsbCA9IFRSVUUpDQoNCg0KZCA8LSB0YWJsZXNbWzFdXQ0KDQoNCiNmaXggY29sdW1ucw0KDQojY29scz1kICAlPiUgc2xpY2UoMSkgJT4lIHVubGlzdCgpICU+JSB1bm5hbWUoKQ0KDQoNCg0KI3JlZHVjZSBjb2xzIChvbmx5ICA4IGhhdmUgYWN0dWFsIGRhdGEpDQojZCAlPD4lICBzZWxlY3QoMTo4KSANCg0KDQoNCiNhZGQgbmFtZXMNCm5hbWVzKGQpPWNvbHNbMTo4XQ0KI25hbWVzKGQpDQoNCg0KZCAlPD4lIGNsZWFuX25hbWVzKCkNCiNkcm9wIHJvdyAxIHdpdGggY29sIG5hbWVzDQojZCAlPD4lIHNsaWNlKDI6bigpKQ0KDQojbWFrZSB0aGUgbnVtYmVyIG5pY2VyIChyZXBsYWNlIGNvbW1hIHdpdGggbm90aGluZyBhbmQgY29udmVydCB0byBudW1iZXIpDQojc3RpbGwgaGF2ZSBzb21lIHByb2JsZW1hdGljIGVudHJpZXMNCmQgJTw+JSBtdXRhdGUocHRfY250PWFzLm51bWVyaWMoc3RyX3JlcGxhY2VfYWxsKG51bWJlcl9vZl9wYXRpZW50cywnLCcsJycpKSkgJT4lIGFycmFuZ2UoZGVzYyhwdF9jbnQpKQ0KDQojZHJvcCBmaXJzdCBjb2x1bW4sIG5vIHZhbHVlIHByb3ZpZGVkDQpkJG51bWJlciA9IE5VTEwNCmBgYA0KIyMgTGlzdA0KYGBge3J9DQoNCg0KZCAlPiUgZGF0YXRhYmxlKCkNCmQkeWVhcj0yMDE5DQpkMTk9ZA0KYWxsZD1iaW5kX3Jvd3MoZDIwLGQxOSkgJT4lIGFycmFuZ2UoZGVzYyh5ZWFyKSxkZXNjKHB0X2NudCkpDQphbGxkICU+JSB3cml0ZV9jc3YoJ29oZHNpLWRhdGEtbmV0d29yay1jb21iaW5lZC5jc3YnKQ0KYGBgDQoNCiMgQ29tYmluZWQgeWVhcnMNCmBgYHtyfQ0KYWxsZCAlPiUgZGF0YXRhYmxlKHJvd25hbWVzID0gRkFMU0UpDQpgYGANCiMgQW5hbHlzaXMgYnkgZGF0YXNldCB0eXBlDQpgYGB7cn0NCmFsbGQgJT4lIGdyb3VwX2J5KHllYXIsZGF0YV90eXBlKSAlPiUgc3VtbWFyaXplKG49bigpKSAlPiUgYXJyYW5nZSh5ZWFyLGRlc2MobikpICU+JSBkYXRhdGFibGUocm93bmFtZXMgPSBGQUxTRSkNCmBgYA0KDQoNCg0K