library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(moderndive)
df = data.frame(a = c(0,2,4,6,7,8), b = c(10,3,8,9,4,1)) %>% as_tibble()
df
df %>%
summarise(E_a = mean(a), E_a2 = mean(a^2), E_b = mean(b), E_b2 = mean(b^2)) %>%
summarise(var_a = E_a2 - E_a^2, var_b = E_b2 - E_b^2) -> dfvar
dfvar
df %>%
summarise(E_a = mean(a), E_b = mean(b), E_ab = mean(a*b)) %>%
summarise(cov_ab = E_ab - (E_a*E_b)) -> dfcov
dfcov
dfvar %>%
summarise(std_a = sqrt(var_a), std_b = sqrt(var_b)) %>%
bind_cols(dfcov) %>%
summarise(cor_ab = cov_ab / (std_a*std_b))
cor(a,b) and cor(b,a) have the same value
but opposite signs 2b.
var(x) = E[x*x] - E[x]*E[x] and
cov(x,y) = E[x*y] - E[x]*E[y] so they are related because
you are replacing half of the references to x with y
cov(m, 3n) = 3*cov(m,n) = 3*16 = 48
cov(m+n, m) = cov(m,n) + cov(m,m) = 16 + 25 = 41
var(m+n) = cov(m+n, m+n) = cov(m, m+n) + cov(n, m+n) = cov(m, m) + cov(m, n) + cov(n, m) + cov(n, n) = 25 + 16 + 16 + 9 = 67
cor(m,n) = cov(m,n)/sqrt(cov(m,m))*sqrt(cov(n,n)) = 16/5*6 = 2.666...
tibble(x = runif(30, min=0, max=10)) %>%
mutate(y = x^2 - 3*x + 5) -> df
df %>% head()
ggplot(df, aes(x,y)) +
geom_point() +
geom_smooth(method = lm, se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
get_regression_table(lm(y~x, df))
y = 6.956x - 10.165
df %>% mutate(y_hat = 6.956*x - 10.165) -> df
df %>% head()
ggplot(df, aes(x,y)) +
geom_point() +
geom_smooth(method = lm, se = FALSE) +
geom_segment(aes(xend = x, yend = y_hat), color='red', size=1.5)
## `geom_smooth()` using formula 'y ~ x'
df %>%
add_row(x = -10, y = (-10)^2 - 3*(-10) + 5, y_hat = 6.956*(-10) - 10.165) %>%
add_row(x = 20, y = 20^2 - 3*20 + 5, y_hat = 6.956*20 - 10.165) %>%
ggplot(aes(x,y)) +
geom_point() +
geom_smooth(method = lm, se = FALSE) +
geom_segment(aes(xend = x, yend = y_hat), color='red', size=1)
## `geom_smooth()` using formula 'y ~ x'