Load the libraries and read the “World Happiness Report 2021” dataset
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.2 ✓ purrr 0.3.4
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd("~/Downloads") # set working directory
WHR <- read_csv("world-happiness-report-2021.csv") # read csv file and rename to WHR
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Country name` = col_character(),
## `Regional indicator` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
head(WHR) # show top 6 rows from the data
## # A tibble: 6 x 20
## `Country name` `Regional indica… `Ladder score` `Standard error … upperwhisker
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Finland Western Europe 7.84 0.032 7.90
## 2 Denmark Western Europe 7.62 0.035 7.69
## 3 Switzerland Western Europe 7.57 0.036 7.64
## 4 Iceland Western Europe 7.55 0.059 7.67
## 5 Netherlands Western Europe 7.46 0.027 7.52
## 6 Norway Western Europe 7.39 0.035 7.46
## # … with 15 more variables: lowerwhisker <dbl>, Logged GDP per capita <dbl>,
## # Social.support <dbl>, Healthy life expectancy <dbl>,
## # Freedom to make life choices <dbl>, Generosity <dbl>,
## # Perceptions of corruption <dbl>, Ladder score in Dystopia <dbl>,
## # Explained by: Log GDP per capita <dbl>, Explained by: Social support <dbl>,
## # Explained by: Healthy life expectancy <dbl>,
## # Explained by: Freedom to make life choices <dbl>,
## # Explained by: Generosity <dbl>,
## # Explained by: Perceptions of corruption <dbl>, Dystopia + residual <dbl>
Clean up the data:
Make all headers lowercase and remove spaces.
names(WHR) <- tolower(names(WHR)) # make all headers lowercase
names(WHR) <- gsub(" ","",names(WHR)) # remove spaces
str(WHR) # display the internal structure
## spec_tbl_df [149 × 20] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ countryname : chr [1:149] "Finland" "Denmark" "Switzerland" "Iceland" ...
## $ regionalindicator : chr [1:149] "Western Europe" "Western Europe" "Western Europe" "Western Europe" ...
## $ ladderscore : num [1:149] 7.84 7.62 7.57 7.55 7.46 ...
## $ standarderrorofladderscore : num [1:149] 0.032 0.035 0.036 0.059 0.027 0.035 0.036 0.037 0.04 0.036 ...
## $ upperwhisker : num [1:149] 7.9 7.69 7.64 7.67 7.52 ...
## $ lowerwhisker : num [1:149] 7.78 7.55 7.5 7.44 7.41 ...
## $ loggedgdppercapita : num [1:149] 10.8 10.9 11.1 10.9 10.9 ...
## $ social.support : num [1:149] 0.954 0.954 0.942 0.983 0.942 0.954 0.934 0.908 0.948 0.934 ...
## $ healthylifeexpectancy : num [1:149] 72 72.7 74.4 73 72.4 73.3 72.7 72.6 73.4 73.3 ...
## $ freedomtomakelifechoices : num [1:149] 0.949 0.946 0.919 0.955 0.913 0.96 0.945 0.907 0.929 0.908 ...
## $ generosity : num [1:149] -0.098 0.03 0.025 0.16 0.175 0.093 0.086 -0.034 0.134 0.042 ...
## $ perceptionsofcorruption : num [1:149] 0.186 0.179 0.292 0.673 0.338 0.27 0.237 0.386 0.242 0.481 ...
## $ ladderscoreindystopia : num [1:149] 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 ...
## $ explainedby:loggdppercapita : num [1:149] 1.45 1.5 1.57 1.48 1.5 ...
## $ explainedby:socialsupport : num [1:149] 1.11 1.11 1.08 1.17 1.08 ...
## $ explainedby:healthylifeexpectancy : num [1:149] 0.741 0.763 0.816 0.772 0.753 0.782 0.763 0.76 0.785 0.782 ...
## $ explainedby:freedomtomakelifechoices: num [1:149] 0.691 0.686 0.653 0.698 0.647 0.703 0.685 0.639 0.665 0.64 ...
## $ explainedby:generosity : num [1:149] 0.124 0.208 0.204 0.293 0.302 0.249 0.244 0.166 0.276 0.215 ...
## $ explainedby:perceptionsofcorruption : num [1:149] 0.481 0.485 0.413 0.17 0.384 0.427 0.448 0.353 0.445 0.292 ...
## $ dystopia+residual : num [1:149] 3.25 2.87 2.84 2.97 2.8 ...
## - attr(*, "spec")=
## .. cols(
## .. `Country name` = col_character(),
## .. `Regional indicator` = col_character(),
## .. `Ladder score` = col_double(),
## .. `Standard error of ladder score` = col_double(),
## .. upperwhisker = col_double(),
## .. lowerwhisker = col_double(),
## .. `Logged GDP per capita` = col_double(),
## .. Social.support = col_double(),
## .. `Healthy life expectancy` = col_double(),
## .. `Freedom to make life choices` = col_double(),
## .. Generosity = col_double(),
## .. `Perceptions of corruption` = col_double(),
## .. `Ladder score in Dystopia` = col_double(),
## .. `Explained by: Log GDP per capita` = col_double(),
## .. `Explained by: Social support` = col_double(),
## .. `Explained by: Healthy life expectancy` = col_double(),
## .. `Explained by: Freedom to make life choices` = col_double(),
## .. `Explained by: Generosity` = col_double(),
## .. `Explained by: Perceptions of corruption` = col_double(),
## .. `Dystopia + residual` = col_double()
## .. )