library(dlstats)
library(feedeR)
library(ggthemes)
library(httr)
library(jsonlite)
library(lubridate)
library(scales)
library(stringr)
library(tidytext)
library(tidyverse)
library(wordcloud)

ggplot_scale <- c("navyblue", "tan4", "chartreuse4", "blueviolet", "blue", "deeppink4", "goldenrod", "violetred", "turquoise2", "lightgreen", "lightpink1", "yellow1", "slategrey", "peachpuff2", "mediumorchid4", "bisque4", "darkolivegreen4", "azure2")

Libraries IO

Repo search by keyword

Repos across package managers containing the term “onnx”

download.file("https://libraries.io/search.atom?order=desc&q=onnx&sort=dependent_repos_count", "libsio_dep_repos.rss.atom")
libsio_rss <- feed.extract("https://libraries.io/search.atom?q=%22onnx%22&sort=dependent_repos_count")


libsio_dep_repos <- libsio_rss$items %>%
  filter(link != "text/html", link != "alternate") %>%
  separate(link, c("url", "libsio"), sep="https://libraries.io/", extra="merge", fill="left") %>%
  select(title, libsio)

saveRDS(libsio_dep_repos, "onnx_repos")
# get info on each repo in list

query_params <- list(
  api_key=params$libsio_api_key
)

get_libsio_repos <- function (url, query) {
  req <- GET(url, query=query)
  print(paste(url))
  json <- content(req, as = "text")
  repo_json <- fromJSON(json, flatten=TRUE)
  repo_json$versions <- NULL
  repo_json_keywords <- repo_json$keywords
  repo_json$keywords <- NULL
  repo <- repo_json %>%
    unlist() %>% 
    as.data.frame.list()
  if (length(repo_json_keywords) != 0) { # FIXME
    repo$keywords <- paste(repo_json_keywords, collapse=",")
  } else {
    repo$keywords <- NULL
  }
  return(repo)
}

repos <- data_frame()
for (n in 1:nrow(libsio_dep_repos)) {
  print(paste(n))
  url <- paste0("https://libraries.io/api/", libsio_dep_repos$libsio[[n]])
  repo <- get_libsio_repos(url, query_params)
  repos <- bind_rows(repos, repo)
  saveRDS(repos, paste0("libsio/_repo_", n, ".rds"))
}
## [1] "1"
## [1] "https://libraries.io/api/pypi/onnx"
## [1] "2"
## [1] "https://libraries.io/api/pypi/onnxmltools"
## [1] "3"
## [1] "https://libraries.io/api/npm/onnxjs"
## [1] "4"
## [1] "https://libraries.io/api/pypi/onnx-mxnet"
## [1] "5"
## [1] "https://libraries.io/api/nuget/Microsoft.ML.Scoring"
## [1] "6"
## [1] "https://libraries.io/api/rubygems/menoh"
## [1] "7"
## [1] "https://libraries.io/api/hackage/menoh"
## [1] "8"
## [1] "https://libraries.io/api/pypi/onnx-tf"
## [1] "9"
## [1] "https://libraries.io/api/pypi/winmltools"
## [1] "10"
## [1] "https://libraries.io/api/pypi/onnx-coreml"
## [1] "11"
## [1] "https://libraries.io/api/hex/onnxs"
## [1] "12"
## [1] "https://libraries.io/api/pypi/turingnetwork"
## [1] "13"
## [1] "https://libraries.io/api/cargo/tract-onnx"
## [1] "14"
## [1] "https://libraries.io/api/pypi/onnx-chainer"
## [1] "15"
## [1] "https://libraries.io/api/nuget/Microsoft.ML.OnnxRuntime.Gpu"
## [1] "16"
## [1] "https://libraries.io/api/nuget/Microsoft.ML.Onnx"
## [1] "17"
## [1] "https://libraries.io/api/pypi/ngraph-onnx"
## [1] "18"
## [1] "https://libraries.io/api/pypi/tf2onnx"
## [1] "19"
## [1] "https://libraries.io/api/cargo/onnx"
## [1] "20"
## [1] "https://libraries.io/api/cran/onnx"
## [1] "21"
## [1] "https://libraries.io/api/nuget/Aiinfra.OnnxRuntime.Gpu"
## [1] "22"
## [1] "https://libraries.io/api/pypi/onnxruntime-gpu"
## [1] "23"
## [1] "https://libraries.io/api/pypi/onnxruntime"
## [1] "24"
## [1] "https://libraries.io/api/pypi/flashlight"
## [1] "25"
## [1] "https://libraries.io/api/rubygems/runx"
## [1] "26"
## [1] "https://libraries.io/api/pypi/git-feature"
## [1] "27"
## [1] "https://libraries.io/api/npm/tfjs-onnx"
## [1] "28"
## [1] "https://libraries.io/api/pypi/onnx-caffe2"
## [1] "29"
## [1] "https://libraries.io/api/nuget/Xam.Plugins.CustomVision"
## [1] "30"
## [1] "https://libraries.io/api/cargo/tract"
repos <- repos %>%
  mutate(stars=as.numeric(as.character(stars)),
         forks=as.numeric(as.character(forks)))

saveRDS(repos, "onnx_libsio_repos.rds")
repos <- readRDS("onnx_libsio_repos.rds")

Platforms

repos <- repos %>%
  mutate(last_release_days = as.numeric(today() - as.Date(latest_release_published_at)))

ggplot(repos, aes(x=reorder(name, -last_release_days), y=last_release_days)) +
  geom_bar(aes(fill=platform), stat="identity", position="dodge") +
  coord_flip() +
  xlab('Repo') + ylab('Days since last release') + theme_few() +
  ggtitle('Libraries.io ONNX Repos') +
  scale_fill_manual(values=ggplot_scale)

Description keywords

description_keywords <- repos %>%
  mutate(description=str_replace(description, "'s", "")) %>%
  select(name, platform, description) %>%
  unnest_tokens(word, description) %>%
  anti_join(stop_words, by="word") %>%
  count(platform, word, sort = TRUE)

desc_tfidf <- description_keywords %>% 
  bind_tf_idf(word, platform, n) %>% 
  arrange(-tf_idf) %>%
  mutate(tf_idf_rnd = round(tf_idf, 2)*100)

desc_summary <- desc_tfidf %>% filter(tf_idf_rnd > 0)

wordcloud(desc_summary$word, desc_summary$tf_idf_rnd, c(5,.8), min.freq=1, colors = ggplot_scale, random.color = TRUE)

Downloads

Pypi Download data - https://bigquery.cloud.google.com/savedquery/306220071795:f014c52a64d04961a1901a65e686207b

download.file("https://storage.cloud.google.com/open_source_community_metrics_exports/qiskit/20190131_pypi_downloads_000000000000.csv?_ga=2.75282194.-1094950915.1548948059", destfile = "qiskit_pypi_downloads.json")
# Google's JSON isn't playing well with jsonlite

pypi_downloads <- read_json("qiskit_pypi_downloads.json")