library(huggingfaceR)
library(tidyverse)
# Grab all of the models' data
all_models <- hf_search_models()
# Get the names of all models for filtering
get_names <- function(x) map(x, ~ names(.x))
all_models_names <- get_names(all_models)
id_col <- 1:length(all_models_names)
# Create tibble for pipeline
all_models_data <- tibble(id_col, all_models_names)
# Get models with data for downloads - or mapping will fail, then pull ids to filte
filter_ids <- all_models_data %>%
unnest(all_models_names) %>%
group_by(id_col) %>%
filter("downloads" %in% all_models_names) %>%
ungroup() %>%
distinct(id_col) %>%
pull(id_col)
# Keep only models with downloads info
all_models <- all_models[filter_ids]
# Extract the data we're interested in
models_with_downloads <- all_models %>%
map_dfr(~ tibble(
model = .x$modelId,
downloads = .x$downloads,
task = .x$pipeline_tag,
sha = .x$sha,
private = .x$private
))
huggingfaceR::models_with_downloads %>%
count(task, sort = TRUE)
## # A tibble: 30 × 2
## task n
## <chr> <int>
## 1 <NA> 18549
## 2 text-classification 7935
## 3 text-generation 5136
## 4 text2text-generation 4780
## 5 fill-mask 3473
## 6 automatic-speech-recognition 2654
## 7 token-classification 2648
## 8 question-answering 1954
## 9 reinforcement-learning 1621
## 10 translation 1612
## # … with 20 more rows
huggingfaceR::models_with_downloads %>%
filter(task == "text-classification")
## # A tibble: 7,935 × 5
## model downloads task sha private
## <chr> <int> <chr> <chr> <lgl>
## 1 distilbert-base-uncased-finetuned-sst-2-english 8786731 text… 83db… FALSE
## 2 cardiffnlp/twitter-roberta-base-sentiment 1269394 text… b636… FALSE
## 3 cross-encoder/ms-marco-MiniLM-L-12-v2 1104884 text… 97f7… FALSE
## 4 cardiffnlp/twitter-xlm-roberta-base-sentiment 935857 text… f3e3… FALSE
## 5 cardiffnlp/twitter-roberta-base-sentiment-late… 843827 text… 5916… FALSE
## 6 ProsusAI/finbert 764115 text… 5ea6… FALSE
## 7 bhadresh-savani/distilbert-base-uncased-emotion 563660 text… fff2… FALSE
## 8 cross-encoder/ms-marco-TinyBERT-L-2 477985 text… e9ed… FALSE
## 9 daigo/bert-base-japanese-sentiment 368812 text… 51ac… FALSE
## 10 yiyanghkust/finbert-tone 313288 text… 6950… FALSE
## # … with 7,925 more rows