library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(moderndive)
library(skimr)
library(ISLR)
I
df1 = tribble(
~x, ~y,
1, 4,
3, 10,
5, 16,
10, 31
)
a) variance of x and y
df1 %>%
mutate(xsq = x*x, ysq = y*y) %>%
summarise(Ex = mean(x), Exsq = mean(xsq), Ey = mean(y), Eysq = mean(ysq)) %>%
summarise(var_x = Exsq-Ex, var_y = Eysq-Ey) -> dfvar
dfvar
b) std of x and y
df1 %>%
mutate(xsq = x*x, ysq = y*y) %>%
summarise(Ex = mean(x), Exsq = mean(xsq), Ey = mean(y), Eysq = mean(ysq)) %>%
summarise(varx = Exsq-Ex, vary = Eysq-Ey) %>%
summarise(std_x = sqrt(varx), std_y = sqrt(vary)) -> dfstd
dfstd
correlation of x and y
df1 %>%
mutate(xsq = x*x, ysq = y*y) %>%
summarise(Ex = mean(x), Ey = mean(y), Exy = mean(x*y)) %>%
summarise(cov_xy = Exy - (Ex*Ey)) -> dfcov
dfcov
dfcov %>%
bind_cols(dfstd) %>%
summarise(cor_xy = cov_xy / (std_x * std_y))
correlation of x and y (with libraries)
df1 %>% get_correlation(y ~ x)
II
df2 = tribble(
~x, ~y,
-5, -15,
-2, 6,
3, 1,
7, -39
)
e) find linear regression of y on x
get_regression_table(lm(y ~ x, df2))
f) graph parabola points + line
ggplot(df2, aes(x, y)) +
geom_point() +
geom_smooth(method=lm, se=FALSE) +
geom_function(fun = function(x) 10-x^2)
## `geom_smooth()` using formula 'y ~ x'
