library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
WorldData <- read.csv("~/Downloads/P_Data_Extract_From_World_Development_Indicators/8e791ceb-b74b-4bb4-8c36-66fa0b38c4bb_Data.csv")
head(WorldData)
## Country.Name Country.Code
## 1 Argentina ARG
## 2 Argentina ARG
## 3 Argentina ARG
## 4 Argentina ARG
## 5 Argentina ARG
## 6 Argentina ARG
## Series.Name
## 1 Adolescent fertility rate (births per 1,000 women ages 15-19)
## 2 Agriculture, forestry, and fishing, value added (% of GDP)
## 3 Annual freshwater withdrawals, total (% of internal resources)
## 4 Births attended by skilled health staff (% of total)
## 5 CO2 emissions (metric tons per capita)
## 6 Contraceptive prevalence, any method (% of married women ages 15-49)
## Series.Code X2014..YR2014. X2015..YR2015. X2016..YR2016.
## 1 SP.ADO.TFRT 67.791 65.395 61.852
## 2 NV.AGR.TOTL.ZS 6.71270351428559 5.1566859021408 6.26456582010254
## 3 ER.H2O.FWTL.ZS 12.9075342465753 12.9075342465753 12.9075342465753
## 4 SH.STA.BRTC.ZS 99.6 99.6 98.4
## 5 EN.ATM.CO2E.PC 4.20911189491323 4.30191380564475 4.20181586904703
## 6 SP.DYN.CONU.ZS .. .. ..
## X2017..YR2017. X2018..YR2018. X2019..YR2019. X2020..YR2020.
## 1 57.783 51.029 46.153 39.866
## 2 5.23162237725058 4.53787889681146 5.3185559967348 6.35703367575527
## 3 12.9075342465753 12.9075342465753 12.9075342465753 12.9075342465753
## 4 93.9 99.5 99.6 98.8
## 5 4.07011168693629 3.97565074444479 3.74202981162433 3.4056175404138
## 6 .. .. .. 70.1
## X2021..YR2021. X2022..YR2022. X2023..YR2023.
## 1 39.065 37.932 ..
## 2 7.30630885522762 6.63989827840203 6.05950876262449
## 3 .. .. ..
## 4 .. .. ..
## 5 .. .. ..
## 6 .. .. ..
str(WorldData)
## 'data.frame': 1085 obs. of 14 variables:
## $ Country.Name : chr "Argentina" "Argentina" "Argentina" "Argentina" ...
## $ Country.Code : chr "ARG" "ARG" "ARG" "ARG" ...
## $ Series.Name : chr "Adolescent fertility rate (births per 1,000 women ages 15-19)" "Agriculture, forestry, and fishing, value added (% of GDP)" "Annual freshwater withdrawals, total (% of internal resources)" "Births attended by skilled health staff (% of total)" ...
## $ Series.Code : chr "SP.ADO.TFRT" "NV.AGR.TOTL.ZS" "ER.H2O.FWTL.ZS" "SH.STA.BRTC.ZS" ...
## $ X2014..YR2014.: chr "67.791" "6.71270351428559" "12.9075342465753" "99.6" ...
## $ X2015..YR2015.: chr "65.395" "5.1566859021408" "12.9075342465753" "99.6" ...
## $ X2016..YR2016.: chr "61.852" "6.26456582010254" "12.9075342465753" "98.4" ...
## $ X2017..YR2017.: chr "57.783" "5.23162237725058" "12.9075342465753" "93.9" ...
## $ X2018..YR2018.: chr "51.029" "4.53787889681146" "12.9075342465753" "99.5" ...
## $ X2019..YR2019.: chr "46.153" "5.3185559967348" "12.9075342465753" "99.6" ...
## $ X2020..YR2020.: chr "39.866" "6.35703367575527" "12.9075342465753" "98.8" ...
## $ X2021..YR2021.: chr "39.065" "7.30630885522762" ".." ".." ...
## $ X2022..YR2022.: chr "37.932" "6.63989827840203" ".." ".." ...
## $ X2023..YR2023.: chr ".." "6.05950876262449" ".." ".." ...
summary(WorldData)
## Country.Name Country.Code Series.Name Series.Code
## Length:1085 Length:1085 Length:1085 Length:1085
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X2014..YR2014. X2015..YR2015. X2016..YR2016. X2017..YR2017.
## Length:1085 Length:1085 Length:1085 Length:1085
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X2018..YR2018. X2019..YR2019. X2020..YR2020. X2021..YR2021.
## Length:1085 Length:1085 Length:1085 Length:1085
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X2022..YR2022. X2023..YR2023.
## Length:1085 Length:1085
## Class :character Class :character
## Mode :character Mode :character
tidy_WorldData <- WorldData %>%
pivot_longer(cols = starts_with("X"),
names_to = "Year",
values_to = "Value") %>%
mutate(Year = gsub("X|\\.\\..*", "", Year))
head(tidy_WorldData)
## # A tibble: 6 × 6
## Country.Name Country.Code Series.Name Series.Code Year Value
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2014 67.7…
## 2 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2015 65.3…
## 3 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2016 61.8…
## 4 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2017 57.7…
## 5 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2018 51.0…
## 6 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2019 46.1…
World_summary <- tidy_WorldData %>%
group_by(Country.Name, Series.Name, Year) %>%
summarize(Average_Value = mean(as.numeric(Value), na.rm = TRUE))
## Warning: There were 2547 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `Average_Value = mean(as.numeric(Value), na.rm = TRUE)`.
## ℹ In group 20: `Country.Name = "Argentina"`, `Series.Name = "Adolescent
## fertility rate (births per 1,000 women ages 15-19)"`, `Year = "2023"`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2546 remaining warnings.
## `summarise()` has grouped output by 'Country.Name', 'Series.Name'. You can
## override using the `.groups` argument.
head(World_summary)
## # A tibble: 6 × 4
## # Groups: Country.Name, Series.Name [1]
## Country.Name Series.Name Year Average_Value
## <chr> <chr> <chr> <dbl>
## 1 "" "" 2014 NaN
## 2 "" "" 2015 NaN
## 3 "" "" 2016 NaN
## 4 "" "" 2017 NaN
## 5 "" "" 2018 NaN
## 6 "" "" 2019 NaN
tidy_WorldData <- tidy_WorldData %>%
mutate(Year = as.numeric(Year),
Value = as.numeric(Value))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Value = as.numeric(Value)`.
## Caused by warning:
## ! NAs introduced by coercion
head(tidy_WorldData)
## # A tibble: 6 × 6
## Country.Name Country.Code Series.Name Series.Code Year Value
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2014 67.8
## 2 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2015 65.4
## 3 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2016 61.9
## 4 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2017 57.8
## 5 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2018 51.0
## 6 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2019 46.2
summary(tidy_WorldData)
## Country.Name Country.Code Series.Name Series.Code
## Length:10850 Length:10850 Length:10850 Length:10850
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Year Value
## Min. :2014 Min. :-3.090e+11
## 1st Qu.:2016 1st Qu.: 6.000e+00
## Median :2018 Median : 3.300e+01
## Mean :2018 Mean : 2.981e+11
## 3rd Qu.:2021 3rd Qu.: 1.208e+04
## Max. :2023 Max. : 3.439e+13
## NA's :2597
tidy_WorldData <- tidy_WorldData %>%
filter(!is.na(Value))
head(tidy_WorldData)
## # A tibble: 6 × 6
## Country.Name Country.Code Series.Name Series.Code Year Value
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2014 67.8
## 2 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2015 65.4
## 3 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2016 61.9
## 4 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2017 57.8
## 5 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2018 51.0
## 6 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2019 46.2
fertility_data <- tidy_WorldData %>%
filter(Series.Name == "Adolescent fertility rate (births per 1,000 women ages 15-19)")
head(fertility_data)
## # A tibble: 6 × 6
## Country.Name Country.Code Series.Name Series.Code Year Value
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2014 67.8
## 2 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2015 65.4
## 3 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2016 61.9
## 4 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2017 57.8
## 5 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2018 51.0
## 6 Argentina ARG Adolescent fertility rate (… SP.ADO.TFRT 2019 46.2