library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(moderndive)
library(skimr)
library(ISLR)

I

df1 = tribble(
  ~x, ~y,
  1,   4,
  3,   10,
  5,   16,
  10,  31
)

a) variance of x and y

df1 %>% 
  mutate(xsq = x*x, ysq = y*y) %>% 
  summarise(Ex = mean(x), Exsq = mean(xsq), Ey = mean(y), Eysq = mean(ysq)) %>% 
  summarise(var_x = Exsq-Ex, var_y = Eysq-Ey) -> dfvar
dfvar

b) std of x and y

df1 %>% 
  mutate(xsq = x*x, ysq = y*y) %>% 
  summarise(Ex = mean(x), Exsq = mean(xsq), Ey = mean(y), Eysq = mean(ysq)) %>% 
  summarise(varx = Exsq-Ex, vary = Eysq-Ey) %>%
  summarise(std_x = sqrt(varx), std_y = sqrt(vary)) -> dfstd
dfstd

correlation of x and y

df1 %>% 
  mutate(xsq = x*x, ysq = y*y) %>% 
  summarise(Ex = mean(x), Ey = mean(y), Exy = mean(x*y)) %>%
  summarise(cov_xy = Exy - (Ex*Ey))  -> dfcov
dfcov
dfcov %>%
  bind_cols(dfstd) %>%
  summarise(cor_xy = cov_xy / (std_x * std_y))

correlation of x and y (with libraries)

df1 %>% get_correlation(y ~ x)

II

df2 = tribble(
  ~x, ~y,
  -5, -15,
  -2,  6,
   3,  1,
   7,  -39
)

e) find linear regression of y on x

get_regression_table(lm(y ~ x, df2))

f) graph parabola points + line

ggplot(df2, aes(x, y)) +
  geom_point() +
  geom_smooth(method=lm, se=FALSE) +
  geom_function(fun = function(x) 10-x^2)
## `geom_smooth()` using formula 'y ~ x'