suppressMessages(library(tidyverse))
## Warning: package 'tidyverse' was built under R version 4.0.5
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.4
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.5
theme_set(theme_light())

suppressMessages(brewing_materials_raw <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-31/brewing_materials.csv"))

brewing_materials_raw %>%
  count(type, wt = month_current, sort = TRUE)
## # A tibble: 12 x 2
##    type                                 n
##    <chr>                            <dbl>
##  1 Total Used                 53559516695
##  2 Total Grain products       44734903124
##  3 Malt and malt products     32697313882
##  4 Total Non-Grain products    8824613571
##  5 Sugar and syrups            6653104081
##  6 Rice and rice products      5685742541
##  7 Corn and corn products      5207759409
##  8 Hops (dry)                  1138840132
##  9 Other                        998968470
## 10 Barley and barley products   941444745
## 11 Wheat and wheat products     202642547
## 12 Hops (used as extracts)       33700888
brewing_filtered <- brewing_materials_raw %>%
  filter(
    type %in% c(
      "Malt and malt products",
      "Sugar and syrups",
      "Hops (dry)"
    ),
    year < 2016,
    !(month == 12 & year %in% 2014:2015)
  ) %>%
  mutate(
    date = paste0(year, "-", month, "-01"),
    date = lubridate::ymd(date)
  )



brewing_filtered %>%
  ggplot(aes(date, month_current, color = type)) +
  geom_point()

brewing_materials <- brewing_filtered %>%
  select(date, type, month_current) %>%
  pivot_wider(
    names_from = type,
    values_from = month_current
  ) %>%
  janitor::clean_names()
brewing_materials %>%
  ggplot(aes(malt_and_malt_products, sugar_and_syrups)) +
  geom_smooth(method = "lm") +
  geom_point()
## `geom_smooth()` using formula 'y ~ x'

library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.0.4
## -- Attaching packages -------------------------------------- tidymodels 0.1.2 --
## v broom     0.7.7      v recipes   0.1.15
## v dials     0.0.9      v rsample   0.0.8 
## v infer     0.5.3      v tune      0.1.2 
## v modeldata 0.1.0      v workflows 0.2.1 
## v parsnip   0.1.5      v yardstick 0.0.7
## Warning: package 'dials' was built under R version 4.0.3
## Warning: package 'modeldata' was built under R version 4.0.4
## Warning: package 'parsnip' was built under R version 4.0.3
## Warning: package 'recipes' was built under R version 4.0.3
## Warning: package 'rsample' was built under R version 4.0.3
## Warning: package 'tune' was built under R version 4.0.3
## Warning: package 'workflows' was built under R version 4.0.3
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()
beer_fit <- lm(sugar_and_syrups ~ 0 + malt_and_malt_products,
  data = brewing_materials
)

summary(beer_fit)
## 
## Call:
## lm(formula = sugar_and_syrups ~ 0 + malt_and_malt_products, data = brewing_materials)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -29985291  -6468052    174001   7364462  23462837 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## malt_and_malt_products 0.205804   0.003446   59.72   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11480000 on 93 degrees of freedom
## Multiple R-squared:  0.9746, Adjusted R-squared:  0.9743 
## F-statistic:  3567 on 1 and 93 DF,  p-value: < 2.2e-16
tidy(beer_fit)
## # A tibble: 1 x 5
##   term                   estimate std.error statistic  p.value
##   <chr>                     <dbl>     <dbl>     <dbl>    <dbl>
## 1 malt_and_malt_products    0.206   0.00345      59.7 5.72e-76
set.seed(123)
beer_boot <- bootstraps(brewing_materials, times = 1e3, apparent = TRUE)

beer_models <- beer_boot %>% mutate(
  model = map(splits, ~lm(sugar_and_syrups ~ 0 + malt_and_malt_products, 
                                             data = .)), 
                     coef_info = map(model, tidy)
                     )

beer_coefs <- beer_models %>%
  unnest(coef_info)
beer_coefs %>%
  ggplot(aes(estimate)) +
  geom_histogram(alpha = 0.7, fill = "cyan3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

int_pctl(beer_models, coef_info)
## # A tibble: 1 x 6
##   term                   .lower .estimate .upper .alpha .method   
##   <chr>                   <dbl>     <dbl>  <dbl>  <dbl> <chr>     
## 1 malt_and_malt_products  0.199     0.206  0.212   0.05 percentile
beer_aug <- beer_models %>% 
  sample_n(200) %>%
  mutate(augmented = map(model, augment))%>%
  unnest(augmented)

beer_aug
## # A tibble: 18,800 x 12
##    splits    id      model coef_info  sugar_and_syrups malt_and_malt_pr~ .fitted
##    <list>    <chr>   <lis> <list>                <dbl>             <dbl>   <dbl>
##  1 <rsplit ~ Bootst~ <lm>  <tibble [~         71341108         384396702  7.78e7
##  2 <rsplit ~ Bootst~ <lm>  <tibble [~         76728131         405393784  8.20e7
##  3 <rsplit ~ Bootst~ <lm>  <tibble [~         73793509         322480722  6.53e7
##  4 <rsplit ~ Bootst~ <lm>  <tibble [~         85703037         340319408  6.89e7
##  5 <rsplit ~ Bootst~ <lm>  <tibble [~         67266337         380521275  7.70e7
##  6 <rsplit ~ Bootst~ <lm>  <tibble [~         81199989         388639443  7.86e7
##  7 <rsplit ~ Bootst~ <lm>  <tibble [~         76115769         399504457  8.08e7
##  8 <rsplit ~ Bootst~ <lm>  <tibble [~         66002563         321371392  6.50e7
##  9 <rsplit ~ Bootst~ <lm>  <tibble [~         85703037         340319408  6.89e7
## 10 <rsplit ~ Bootst~ <lm>  <tibble [~         74805384         351222725  7.11e7
## # ... with 18,790 more rows, and 5 more variables: .resid <dbl>, .hat <dbl>,
## #   .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>
ggplot(beer_aug, aes(malt_and_malt_products, sugar_and_syrups)) +
  geom_line(aes(y = .fitted, group = id), alpha = .2, col = "cyan3") +
  geom_point()