library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
metrics <- jsonlite::fromJSON('../output_data_crawls/ipfs/visitedPeers_2024-06-03--17-53-58.json')
crawl_metrics <- metrics$found_nodes$result
crawl_stats <- crawl_metrics |>
mutate(
crawl_end_ts = parse_datetime(crawl_end_ts),
crawl_begin_ts = parse_datetime(crawl_begin_ts),
response_time = crawl_end_ts - crawl_begin_ts
)
Response Times
ggplot(crawl_stats |> filter(!is.na(response_time))) +
geom_histogram(aes(x = as.numeric(response_time)), bins = 30, col = 'black') +
xlab('response time (seconds)') +
ylab('frequency')

summ <- crawl_stats |>
filter(!is.na(response_time)) |>
pull(response_time) |>
as.numeric() |>
summary()
knitr::kable(data.frame(
statistic = names(summ),
time = as.numeric(summ)
))
Min. |
0.0534117 |
1st Qu. |
1.8305487 |
Median |
3.6660675 |
Mean |
8.2265389 |
3rd Qu. |
8.7093343 |
Max. |
65.1360004 |
Agent Info
crawl_stats |>
filter(!is.na(agent_version)) |>
group_by(agent_version) |>
count() |>
ungroup() |>
mutate(percent = n / sum(n) * 100) |>
arrange(desc(percent)) |>
select(-n) |>
DT::datatable()