library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
metrics <- jsonlite::fromJSON('../output_data_crawls/ipfs/visitedPeers_2024-06-03--17-53-58.json')
crawl_metrics <- metrics$found_nodes$result
crawl_stats <- crawl_metrics |> 
  mutate(
    crawl_end_ts = parse_datetime(crawl_end_ts), 
    crawl_begin_ts = parse_datetime(crawl_begin_ts),
    response_time = crawl_end_ts - crawl_begin_ts
  )

Response Times

ggplot(crawl_stats |> filter(!is.na(response_time))) +
  geom_histogram(aes(x = as.numeric(response_time)), bins = 30, col = 'black') +
  xlab('response time (seconds)') + 
  ylab('frequency')

summ <- crawl_stats |> 
  filter(!is.na(response_time)) |> 
  pull(response_time) |> 
  as.numeric() |> 
  summary()

knitr::kable(data.frame(
  statistic = names(summ),
  time = as.numeric(summ)
))
statistic time
Min. 0.0534117
1st Qu. 1.8305487
Median 3.6660675
Mean 8.2265389
3rd Qu. 8.7093343
Max. 65.1360004

Agent Info

crawl_stats |> 
  filter(!is.na(agent_version)) |>
  group_by(agent_version) |> 
  count() |> 
  ungroup() |>
  mutate(percent = n / sum(n) * 100) |>
  arrange(desc(percent)) |>
  select(-n) |>
  DT::datatable()