net income

income_all <- purchases %>%
  group_by(year) %>%
  summarise(
    revenue = sum(Payment, na.rm = TRUE), 
    .groups = "drop") %>%
  left_join(
    promos %>%
      group_by(Year) %>%
      summarise(
        promo_cost = sum(Cost, na.rm = TRUE), 
        .groups = "drop") %>%
      rename(year = Year),
    by = "year"
  ) %>%
  mutate(net_calc = revenue - promo_cost) %>%
  left_join(
    income %>%
      rename(
        year = year,
        net_statement = Net_income
      ),
    by = "year"
  ) %>%
  mutate(
    diff   = net_calc - net_statement,
    margin = net_calc / revenue
  )
income_all <- income_all %>%
  mutate(
    diff = net_calc - net_statement,
    match_flag = case_when(
      diff == 0 ~ "Perfect match",
      diff > 0~ "Calculated higher",
      TRUE ~ "Statement higher"
    )
  )

ggplotly(
  income_all %>%
    select(year, net_calc, net_statement) %>%
    pivot_longer(
      cols = c(net_calc, net_statement),
      names_to = "source",
      values_to = "net_income"
    ) %>%
    ggplot(
      aes(
        x = factor(year),
        y = net_income,
        fill = source,
        text = paste0(
          "Year: ", year,
          " | ",
          ifelse(source == "net_calc", "Calculated: ", "Income statement: "),
          label_comma()(net_income)
        )
      )
    ) +
    geom_col(position = "dodge", width = 0.6) +
    scale_y_continuous(labels = label_comma()) +
    scale_fill_manual(
      values = c(
        "net_calc" = "#ED3E61",
        "net_statement" = "#68A1F7"
      ),
      labels = c(
        "net_calc" = "Calculated net income",
        "net_statement"  = "Income statement net income"
      ),
      name = NULL
    ) +
    labs(
      x  = "Year",
      y  = "Net income",
      title = "Calculated vs Reported Net Income"
    ),
  tooltip = "text"
)