suppressMessages(library(tidyverse))
## Warning: package 'tidyverse' was built under R version 4.0.5
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.4
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.5
theme_set(theme_light())
suppressMessages(brewing_materials_raw <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-31/brewing_materials.csv"))
brewing_materials_raw %>%
count(type, wt = month_current, sort = TRUE)
## # A tibble: 12 x 2
## type n
## <chr> <dbl>
## 1 Total Used 53559516695
## 2 Total Grain products 44734903124
## 3 Malt and malt products 32697313882
## 4 Total Non-Grain products 8824613571
## 5 Sugar and syrups 6653104081
## 6 Rice and rice products 5685742541
## 7 Corn and corn products 5207759409
## 8 Hops (dry) 1138840132
## 9 Other 998968470
## 10 Barley and barley products 941444745
## 11 Wheat and wheat products 202642547
## 12 Hops (used as extracts) 33700888
brewing_filtered <- brewing_materials_raw %>%
filter(
type %in% c(
"Malt and malt products",
"Sugar and syrups",
"Hops (dry)"
),
year < 2016,
!(month == 12 & year %in% 2014:2015)
) %>%
mutate(
date = paste0(year, "-", month, "-01"),
date = lubridate::ymd(date)
)
brewing_filtered %>%
ggplot(aes(date, month_current, color = type)) +
geom_point()

brewing_materials <- brewing_filtered %>%
select(date, type, month_current) %>%
pivot_wider(
names_from = type,
values_from = month_current
) %>%
janitor::clean_names()
brewing_materials %>%
ggplot(aes(malt_and_malt_products, sugar_and_syrups)) +
geom_smooth(method = "lm") +
geom_point()
## `geom_smooth()` using formula 'y ~ x'

library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.0.4
## -- Attaching packages -------------------------------------- tidymodels 0.1.2 --
## v broom 0.7.7 v recipes 0.1.15
## v dials 0.0.9 v rsample 0.0.8
## v infer 0.5.3 v tune 0.1.2
## v modeldata 0.1.0 v workflows 0.2.1
## v parsnip 0.1.5 v yardstick 0.0.7
## Warning: package 'dials' was built under R version 4.0.3
## Warning: package 'modeldata' was built under R version 4.0.4
## Warning: package 'parsnip' was built under R version 4.0.3
## Warning: package 'recipes' was built under R version 4.0.3
## Warning: package 'rsample' was built under R version 4.0.3
## Warning: package 'tune' was built under R version 4.0.3
## Warning: package 'workflows' was built under R version 4.0.3
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
beer_fit <- lm(sugar_and_syrups ~ 0 + malt_and_malt_products,
data = brewing_materials
)
summary(beer_fit)
##
## Call:
## lm(formula = sugar_and_syrups ~ 0 + malt_and_malt_products, data = brewing_materials)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29985291 -6468052 174001 7364462 23462837
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## malt_and_malt_products 0.205804 0.003446 59.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11480000 on 93 degrees of freedom
## Multiple R-squared: 0.9746, Adjusted R-squared: 0.9743
## F-statistic: 3567 on 1 and 93 DF, p-value: < 2.2e-16
tidy(beer_fit)
## # A tibble: 1 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 malt_and_malt_products 0.206 0.00345 59.7 5.72e-76
set.seed(123)
beer_boot <- bootstraps(brewing_materials, times = 1e3, apparent = TRUE)
beer_models <- beer_boot %>% mutate(
model = map(splits, ~lm(sugar_and_syrups ~ 0 + malt_and_malt_products,
data = .)),
coef_info = map(model, tidy)
)
beer_coefs <- beer_models %>%
unnest(coef_info)
beer_coefs %>%
ggplot(aes(estimate)) +
geom_histogram(alpha = 0.7, fill = "cyan3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

int_pctl(beer_models, coef_info)
## # A tibble: 1 x 6
## term .lower .estimate .upper .alpha .method
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 malt_and_malt_products 0.199 0.206 0.212 0.05 percentile
beer_aug <- beer_models %>%
sample_n(200) %>%
mutate(augmented = map(model, augment))%>%
unnest(augmented)
beer_aug
## # A tibble: 18,800 x 12
## splits id model coef_info sugar_and_syrups malt_and_malt_pr~ .fitted
## <list> <chr> <lis> <list> <dbl> <dbl> <dbl>
## 1 <rsplit ~ Bootst~ <lm> <tibble [~ 71341108 384396702 7.78e7
## 2 <rsplit ~ Bootst~ <lm> <tibble [~ 76728131 405393784 8.20e7
## 3 <rsplit ~ Bootst~ <lm> <tibble [~ 73793509 322480722 6.53e7
## 4 <rsplit ~ Bootst~ <lm> <tibble [~ 85703037 340319408 6.89e7
## 5 <rsplit ~ Bootst~ <lm> <tibble [~ 67266337 380521275 7.70e7
## 6 <rsplit ~ Bootst~ <lm> <tibble [~ 81199989 388639443 7.86e7
## 7 <rsplit ~ Bootst~ <lm> <tibble [~ 76115769 399504457 8.08e7
## 8 <rsplit ~ Bootst~ <lm> <tibble [~ 66002563 321371392 6.50e7
## 9 <rsplit ~ Bootst~ <lm> <tibble [~ 85703037 340319408 6.89e7
## 10 <rsplit ~ Bootst~ <lm> <tibble [~ 74805384 351222725 7.11e7
## # ... with 18,790 more rows, and 5 more variables: .resid <dbl>, .hat <dbl>,
## # .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>
ggplot(beer_aug, aes(malt_and_malt_products, sugar_and_syrups)) +
geom_line(aes(y = .fitted, group = id), alpha = .2, col = "cyan3") +
geom_point()
