Lets Import the useful libraries & the dataset
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.3 ──
## ✓ broom 0.7.6 ✓ recipes 0.1.16
## ✓ dials 0.0.9 ✓ rsample 0.1.0
## ✓ dplyr 1.0.6 ✓ tibble 3.1.2
## ✓ ggplot2 3.3.3 ✓ tidyr 1.1.3
## ✓ infer 0.5.4 ✓ tune 0.1.5
## ✓ modeldata 0.1.0 ✓ workflows 0.2.2
## ✓ parsnip 0.1.5 ✓ workflowsets 0.0.2
## ✓ purrr 0.3.4 ✓ yardstick 0.0.8
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x recipes::step() masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ stringr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x readr::col_factor() masks scales::col_factor()
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x stringr::fixed() masks recipes::fixed()
## x dplyr::lag() masks stats::lag()
## x readr::spec() masks yardstick::spec()
data("Orange")
orange <- as_tibble(Orange)
glimpse(orange)
## Rows: 35
## Columns: 3
## $ Tree <ord> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,…
## $ age <dbl> 118, 484, 664, 1004, 1231, 1372, 1582, 118, 484, 664, 10…
## $ circumference <dbl> 30, 58, 87, 115, 120, 142, 145, 33, 69, 111, 156, 172, 2…
cor(orange$age,orange$circumference)
## [1] 0.9135189
ggplot(orange, aes(age,circumference, color=Tree)) +
geom_line() +theme_bw()
Test for correlations individually within each tree
orange %>% group_by(Tree) %>% summarize(correlation = cor(age,circumference))
instead of simply estimating a correlation, we want to perform a hypothesis test
ct <- cor.test(orange$age,orange$circumference)
ct
##
## Pearson's product-moment correlation
##
## data: orange$age and orange$circumference
## t = 12.9, df = 33, p-value = 1.931e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8342364 0.9557955
## sample estimates:
## cor
## 0.9135189
tidy(ct)
nested <- orange %>% nest(data=c(age,circumference))
nested
nested %>% mutate(test = map(data, ~ cor.test(.x$age, .x$circumference)),tidied = map(test,tidy))
orange %>%
nest(data = c(age, circumference)) %>%
mutate(
test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col
tidied = map(test, tidy)
) %>%
unnest(cols = tidied) %>%
select(-data, -test)
Regression Model
lm_fit <- lm(age ~ circumference, data = orange)
summary(lm_fit)
##
## Call:
## lm(formula = age ~ circumference, data = orange)
##
## Residuals:
## Min 1Q Median 3Q Max
## -317.88 -140.90 -17.20 96.54 471.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.6036 78.1406 0.212 0.833
## circumference 7.8160 0.6059 12.900 1.93e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 203.1 on 33 degrees of freedom
## Multiple R-squared: 0.8345, Adjusted R-squared: 0.8295
## F-statistic: 166.4 on 1 and 33 DF, p-value: 1.931e-14
tidy(lm_fit)
orange %>%
nest(data = c(-Tree)) %>%
mutate(
fit = map(data, ~ lm(age ~ circumference, data = .x)),
tidied = map(fit, tidy)
) %>%
unnest(tidied) %>%
select(-data, -fit)
mtcars <- as_tibble(mtcars)
mtcars
mtcars %>%
nest(data = c(-am)) %>%
mutate(
fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col
tidied = map(fit, tidy)
) %>%
unnest(tidied) %>%
select(-data, -fit)
regressions <- mtcars %>%
nest(data = c(-am)) %>%
mutate(
fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)),
tidied = map(fit, tidy),
glanced = map(fit, glance),
augmented = map(fit, augment)
)
regressions %>%
select(tidied) %>%
unnest(tidied)
regressions %>%
select(glanced) %>%
unnest(glanced)
regressions %>%
select(augmented) %>%
unnest(augmented)