library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(moderndive)
library(skimr)
library(gapminder)
evals_ch5 <- evals %>%
select(ID, score, bty_avg, age)
evals_ch5 %>% select(age, score) %>% skim()
| Name | Piped data |
| Number of rows | 463 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 48.37 | 9.80 | 29.0 | 42.0 | 48.0 | 57.0 | 73 | ▅▆▇▆▁ |
| score | 0 | 1 | 4.17 | 0.54 | 2.3 | 3.8 | 4.3 | 4.6 | 5 | ▁▁▅▇▇ |
evals_ch5 %>%
get_correlation(formula = score ~ age)
ggplot(evals_ch5, aes(x = age, y = score)) +
geom_jitter() +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
Score is negatively correlated with age.
get_regression_table(lm(score ~ age, data = evals_ch5))
this slope and intercept are the same as the line from
geom_smooth()
get_regression_points(lm(score ~ age, data = evals_ch5))
get_regression_points(lm(score ~ age, data = evals_ch5)) %>% summarise(ssr = sum(residual**2), mse=mean(residual**2))
gapminder %>% filter(year==2007) %>%
ggplot() +
geom_histogram(aes(gdpPercap)) +
facet_wrap(~continent)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
gapminder %>% filter(year==2007) %>%
ggplot() +
geom_boxplot(aes(continent, gdpPercap))
gapminder %>%
filter(year==2007) %>%
group_by(continent) %>%
summarize(median = median(gdpPercap),
mean = mean(gdpPercap))
Africa has by far the lowest gdpPercap. Asia has the most spread.
get_regression_table(lm(gdpPercap ~ continent, data = gapminder %>% filter(year==2007)))
The baseline y intercept for Africa is equal to the mean we found earlier. Actually, all of the estimates + the africa estimate equal their mean from earlier. Not that useful though.
get_regression_points(lm(lifeExp ~ continent, data = gapminder %>% filter(year==2007)), ID = "country") %>% arrange(residual) %>% head()
These countries life expectancy is farthest below the average for their continent
get_regression_points(lm(lifeExp ~ continent, data = gapminder %>% filter(year==2007)), ID = "country") %>% arrange(desc(residual)) %>% head()
These countries life expectancy is farthest above the average for their continent
Red: -0.5, -1.5, 0.5 -> SSR = 2.75 Green: 0, -1.5, 2 -> SSR = 6.25 Blue: 0.5, -1, 0.5 -> SSR = 1.5