library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cranlogs)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
rf <- randomForest::randomForest
data <- cran_downloads(from = "2021-01-01", to = "last-day")
data <- as_tibble(data)
data
## # A tibble: 1,471 × 2
## date count
## <date> <dbl>
## 1 2021-01-01 2110263
## 2 2021-01-02 2603171
## 3 2021-01-03 2604837
## 4 2021-01-04 4493448
## 5 2021-01-05 4626637
## 6 2021-01-06 4788697
## 7 2021-01-07 4732265
## 8 2021-01-08 4302291
## 9 2021-01-09 2754254
## 10 2021-01-10 2748909
## # ℹ 1,461 more rows
ggplot(data, aes(date, count)) +
geom_line()

data <- data |>
mutate(
y = year(date),
m = month(date),
m_lbl = month(date, label = TRUE),
d = day(date),
dw = weekdays(date)
)
mod <- lm(count ~ y, data = data)
plot_data_mod <- function(data, mod){
data |>
mutate(pred = predict(mod)) |>
ggplot(aes(date)) +
geom_line(aes(y = count), color = "gray70") +
geom_line(aes(y = pred), color = "darkred")
}
plot_data_mod(data, lm(count ~ y, data = data))

plot_data_mod(data, lm(count ~ y + m, data = data))

plot_data_mod(data, lm(count ~ y + m + m_lbl, data = data))

plot_data_mod(data, lm(count ~ y + m + m_lbl, data = data))

plot_data_mod(data, lm(count ~ y + m + m_lbl + d, data = data))

plot_data_mod(data, lm(count ~ y + m + m_lbl + d + dw, data = data))

plot_data_mod(data, rf(count ~ y + m + m_lbl + d + dw, data = data))

plot_data_mod_fut <- function(data, mod){
dfut <- tibble(date = seq(
from = as.Date("2025-01-01"),
to = as.Date("2025-04-01"),
by = "day")
)
dfut <- dfut |>
mutate(
y = year(date),
m = month(date),
m_lbl = month(date, label = TRUE),
d = day(date),
dw = weekdays(date),
)
dfut <- dfut |>
mutate(pred = predict(mod, newdata = dfut))
plot_data_mod(data, mod) +
geom_line(aes(date, pred), data = dfut, color = "blue")
}
plot_data_mod_fut(data, lm(count ~ y, data = data))

plot_data_mod_fut(data, lm(count ~ y + m, data = data))

plot_data_mod_fut(data, lm(count ~ y + m + m_lbl, data = data))

plot_data_mod_fut(data, lm(count ~ y + m + m_lbl, data = data))

plot_data_mod_fut(data, lm(count ~ y + m + m_lbl + d, data = data))

plot_data_mod_fut(data, lm(count ~ y + m + m_lbl + d + dw, data = data))

plot_data_mod_fut(data, rf(count ~ y + m + m_lbl + d + dw, data = data))
