library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cranlogs)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin
rf <- randomForest::randomForest

data <- cran_downloads(from = "2021-01-01", to = "last-day")
data <- as_tibble(data)
data
## # A tibble: 1,471 × 2
##    date         count
##    <date>       <dbl>
##  1 2021-01-01 2110263
##  2 2021-01-02 2603171
##  3 2021-01-03 2604837
##  4 2021-01-04 4493448
##  5 2021-01-05 4626637
##  6 2021-01-06 4788697
##  7 2021-01-07 4732265
##  8 2021-01-08 4302291
##  9 2021-01-09 2754254
## 10 2021-01-10 2748909
## # ℹ 1,461 more rows
ggplot(data, aes(date, count)) +
  geom_line()

data <- data |> 
  mutate(
    y = year(date),
    m = month(date),
    m_lbl = month(date, label = TRUE),
    d = day(date),
    dw = weekdays(date)
    )


mod <- lm(count  ~ y, data = data)


plot_data_mod <- function(data, mod){
  
  data |> 
    mutate(pred = predict(mod)) |> 
    ggplot(aes(date)) +
    geom_line(aes(y = count), color = "gray70") +
    geom_line(aes(y = pred), color = "darkred")
  
}

plot_data_mod(data, lm(count  ~ y, data = data))

plot_data_mod(data, lm(count  ~ y + m, data = data))

plot_data_mod(data, lm(count  ~ y + m + m_lbl, data = data))

plot_data_mod(data, lm(count  ~ y + m + m_lbl, data = data))

plot_data_mod(data, lm(count  ~ y + m + m_lbl + d, data = data))

plot_data_mod(data, lm(count  ~ y + m + m_lbl + d + dw, data = data))

plot_data_mod(data, rf(count  ~ y + m + m_lbl + d + dw, data = data))

plot_data_mod_fut <- function(data, mod){
  
  dfut <- tibble(date = seq(
    from = as.Date("2025-01-01"),
    to   = as.Date("2025-04-01"),
    by   = "day")
    )
  
  dfut <- dfut |> 
    mutate(
      y = year(date),
      m = month(date),
      m_lbl = month(date, label = TRUE),
      d = day(date),
      dw = weekdays(date),
    )
  
  dfut <- dfut |> 
    mutate(pred = predict(mod, newdata = dfut)) 
  
  plot_data_mod(data, mod) +
    geom_line(aes(date, pred), data = dfut, color  = "blue")
  
}

plot_data_mod_fut(data, lm(count  ~ y, data = data))

plot_data_mod_fut(data, lm(count  ~ y + m, data = data))

plot_data_mod_fut(data, lm(count  ~ y + m + m_lbl, data = data))

plot_data_mod_fut(data, lm(count  ~ y + m + m_lbl, data = data))

plot_data_mod_fut(data, lm(count  ~ y + m + m_lbl + d, data = data))

plot_data_mod_fut(data, lm(count  ~ y + m + m_lbl + d + dw, data = data))

plot_data_mod_fut(data, rf(count  ~ y + m + m_lbl + d + dw, data = data))