library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.1 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the data
df <- read_csv("Life Expectancy Data.csv")Rows: 2938 Columns: 22
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Country, Status
dbl (20): Year, Life expectancy, Adult Mortality, infant deaths, Alcohol, pe...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check what we have
glimpse(df)Rows: 2,938
Columns: 22
$ Country <chr> "Afghanistan", "Afghanistan", "Afgha…
$ Year <dbl> 2015, 2014, 2013, 2012, 2011, 2010, …
$ Status <chr> "Developing", "Developing", "Develop…
$ `Life expectancy` <dbl> 65.0, 59.9, 59.9, 59.5, 59.2, 58.8, …
$ `Adult Mortality` <dbl> 263, 271, 268, 272, 275, 279, 281, 2…
$ `infant deaths` <dbl> 62, 64, 66, 69, 71, 74, 77, 80, 82, …
$ Alcohol <dbl> 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, …
$ `percentage expenditure` <dbl> 71.279624, 73.523582, 73.219243, 78.…
$ `Hepatitis B` <dbl> 65, 62, 64, 67, 68, 66, 63, 64, 63, …
$ Measles <dbl> 1154, 492, 430, 2787, 3013, 1989, 28…
$ BMI <dbl> 19.1, 18.6, 18.1, 17.6, 17.2, 16.7, …
$ `under-five deaths` <dbl> 83, 86, 89, 93, 97, 102, 106, 110, 1…
$ Polio <dbl> 6, 58, 62, 67, 68, 66, 63, 64, 63, 5…
$ `Total expenditure` <dbl> 8.16, 8.18, 8.13, 8.52, 7.87, 9.20, …
$ Diphtheria <dbl> 65, 62, 64, 67, 68, 66, 63, 64, 63, …
$ `HIV/AIDS` <dbl> 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0…
$ GDP <dbl> 584.25921, 612.69651, 631.74498, 669…
$ Population <dbl> 33736494, 327582, 31731688, 3696958,…
$ `thinness 1-19 years` <dbl> 17.2, 17.5, 17.7, 17.9, 18.2, 18.4, …
$ `thinness 5-9 years` <dbl> 17.3, 17.5, 17.7, 18.0, 18.2, 18.4, …
$ `Income composition of resources` <dbl> 0.479, 0.476, 0.470, 0.463, 0.454, 0…
$ Schooling <dbl> 10.1, 10.0, 9.9, 9.8, 9.5, 9.2, 8.9,…
colnames(df) [1] "Country" "Year"
[3] "Status" "Life expectancy"
[5] "Adult Mortality" "infant deaths"
[7] "Alcohol" "percentage expenditure"
[9] "Hepatitis B" "Measles"
[11] "BMI" "under-five deaths"
[13] "Polio" "Total expenditure"
[15] "Diphtheria" "HIV/AIDS"
[17] "GDP" "Population"
[19] "thinness 1-19 years" "thinness 5-9 years"
[21] "Income composition of resources" "Schooling"
df <- df %>%
rename(
life_expectancy = `Life expectancy`,
adult_mortality = `Adult Mortality`,
infant_deaths = `infant deaths`,
pct_expenditure = `percentage expenditure`,
hepatitis_b = `Hepatitis B`,
total_expenditure = `Total expenditure`,
hiv_aids = `HIV/AIDS`,
thinness_1_19 = `thinness 1-19 years`,
thinness_5_9 = `thinness 5-9 years`,
income_composition = `Income composition of resources`,
under_five_deaths = `under-five deaths`
) %>%
filter(!is.na(life_expectancy))