library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(gapminder)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 0.2.0 ──
## ✓ broom        0.7.12     ✓ rsample      0.1.1 
## ✓ dials        0.1.0      ✓ tune         0.2.0 
## ✓ infer        1.0.0      ✓ workflows    0.2.6 
## ✓ modeldata    0.1.1      ✓ workflowsets 0.2.1 
## ✓ parsnip      0.2.1      ✓ yardstick    0.0.9 
## ✓ recipes      0.2.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
glimpse(gapminder, width = 50)
## Rows: 1,704
## Columns: 6
## $ country   <fct> "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, …
## $ year      <int> 1952, 1957, 1962, 1967, 1972, …
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020…
## $ pop       <int> 8425333, 9240934, 10267083, 11…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, …
nineteeneightytwo <- gapminder %>% filter(year == 1982) 
view(nineteeneightytwo)
ggplot(data = nineteeneightytwo, aes(x = pop, y = gdpPercap)) + geom_point()

twolevel <- nineteeneightytwo %>% select(pop, gdpPercap)
cor.test(twolevel$pop,twolevel$gdpPercap)
## 
##  Pearson's product-moment correlation
## 
## data:  twolevel$pop and twolevel$gdpPercap
## t = -0.71053, df = 140, p-value = 0.4786
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2224733  0.1058295
## sample estimates:
##         cor 
## -0.05994275

There appears to be a negative correlation of -0.71 between a country’s population and its GDP per capita, but this may be skewed by two extreme outliers, India and China, with a very high population and very low GDP per capita.

u <- c(6, 5, 11, 33, 4, 5, 80, 18, 35, 17, 23)
f <- c(4, 14, 11, 9, 9, 8, 4, 20, 5, 8.9, 21, 9.2, 3, 2, 0.3)

mean(u)
## [1] 21.54545
mean(f)
## [1] 8.56
median(u)
## [1] 17
median(f)
## [1] 8.9

The set “u” has both a greater mean and a greater median, but the mean is greater by a much larger amount. This is because there are large outliers in the u set that push the mean up, but their influence on the median is absent.

bp <- c(118.6, 127.4, 138.4, 130, 113.7, 122, 108.3, 131.5, 133.2)
median(bp)
## [1] 127.4
new_bp <- c(118.6, 127.6, 138.4, 130, 113.7, 122, 108.3, 131.5, 133.2)
median(new_bp)
## [1] 127.6

The median of the reported blood pressure values is 127.4. Changing the second value to 127.6 also changes the median to 127.6, since that value was the previous median. Changing of individual values in a data set has no effect on the median unless the number that was changed was the previous median, or the value that was changed became the new median, in which case we can see an effect even with miniscule changes.