library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
WorldData <- read.csv("~/Downloads/P_Data_Extract_From_World_Development_Indicators/8e791ceb-b74b-4bb4-8c36-66fa0b38c4bb_Data.csv")
head(WorldData)
##   Country.Name Country.Code
## 1    Argentina          ARG
## 2    Argentina          ARG
## 3    Argentina          ARG
## 4    Argentina          ARG
## 5    Argentina          ARG
## 6    Argentina          ARG
##                                                            Series.Name
## 1        Adolescent fertility rate (births per 1,000 women ages 15-19)
## 2           Agriculture, forestry, and fishing, value added (% of GDP)
## 3       Annual freshwater withdrawals, total (% of internal resources)
## 4                 Births attended by skilled health staff (% of total)
## 5                               CO2 emissions (metric tons per capita)
## 6 Contraceptive prevalence, any method (% of married women ages 15-49)
##      Series.Code   X2014..YR2014.   X2015..YR2015.   X2016..YR2016.
## 1    SP.ADO.TFRT           67.791           65.395           61.852
## 2 NV.AGR.TOTL.ZS 6.71270351428559  5.1566859021408 6.26456582010254
## 3 ER.H2O.FWTL.ZS 12.9075342465753 12.9075342465753 12.9075342465753
## 4 SH.STA.BRTC.ZS             99.6             99.6             98.4
## 5 EN.ATM.CO2E.PC 4.20911189491323 4.30191380564475 4.20181586904703
## 6 SP.DYN.CONU.ZS               ..               ..               ..
##     X2017..YR2017.   X2018..YR2018.   X2019..YR2019.   X2020..YR2020.
## 1           57.783           51.029           46.153           39.866
## 2 5.23162237725058 4.53787889681146  5.3185559967348 6.35703367575527
## 3 12.9075342465753 12.9075342465753 12.9075342465753 12.9075342465753
## 4             93.9             99.5             99.6             98.8
## 5 4.07011168693629 3.97565074444479 3.74202981162433  3.4056175404138
## 6               ..               ..               ..             70.1
##     X2021..YR2021.   X2022..YR2022.   X2023..YR2023.
## 1           39.065           37.932               ..
## 2 7.30630885522762 6.63989827840203 6.05950876262449
## 3               ..               ..               ..
## 4               ..               ..               ..
## 5               ..               ..               ..
## 6               ..               ..               ..
str(WorldData)
## 'data.frame':    1085 obs. of  14 variables:
##  $ Country.Name  : chr  "Argentina" "Argentina" "Argentina" "Argentina" ...
##  $ Country.Code  : chr  "ARG" "ARG" "ARG" "ARG" ...
##  $ Series.Name   : chr  "Adolescent fertility rate (births per 1,000 women ages 15-19)" "Agriculture, forestry, and fishing, value added (% of GDP)" "Annual freshwater withdrawals, total (% of internal resources)" "Births attended by skilled health staff (% of total)" ...
##  $ Series.Code   : chr  "SP.ADO.TFRT" "NV.AGR.TOTL.ZS" "ER.H2O.FWTL.ZS" "SH.STA.BRTC.ZS" ...
##  $ X2014..YR2014.: chr  "67.791" "6.71270351428559" "12.9075342465753" "99.6" ...
##  $ X2015..YR2015.: chr  "65.395" "5.1566859021408" "12.9075342465753" "99.6" ...
##  $ X2016..YR2016.: chr  "61.852" "6.26456582010254" "12.9075342465753" "98.4" ...
##  $ X2017..YR2017.: chr  "57.783" "5.23162237725058" "12.9075342465753" "93.9" ...
##  $ X2018..YR2018.: chr  "51.029" "4.53787889681146" "12.9075342465753" "99.5" ...
##  $ X2019..YR2019.: chr  "46.153" "5.3185559967348" "12.9075342465753" "99.6" ...
##  $ X2020..YR2020.: chr  "39.866" "6.35703367575527" "12.9075342465753" "98.8" ...
##  $ X2021..YR2021.: chr  "39.065" "7.30630885522762" ".." ".." ...
##  $ X2022..YR2022.: chr  "37.932" "6.63989827840203" ".." ".." ...
##  $ X2023..YR2023.: chr  ".." "6.05950876262449" ".." ".." ...
summary(WorldData)
##  Country.Name       Country.Code       Series.Name        Series.Code       
##  Length:1085        Length:1085        Length:1085        Length:1085       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  X2014..YR2014.     X2015..YR2015.     X2016..YR2016.     X2017..YR2017.    
##  Length:1085        Length:1085        Length:1085        Length:1085       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  X2018..YR2018.     X2019..YR2019.     X2020..YR2020.     X2021..YR2021.    
##  Length:1085        Length:1085        Length:1085        Length:1085       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  X2022..YR2022.     X2023..YR2023.    
##  Length:1085        Length:1085       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
tidy_WorldData <- WorldData %>%
  pivot_longer(cols = starts_with("X"), 
               names_to = "Year", 
               values_to = "Value") %>%
  mutate(Year = gsub("X|\\.\\..*", "", Year)) 
head(tidy_WorldData)
## # A tibble: 6 × 6
##   Country.Name Country.Code Series.Name                  Series.Code Year  Value
##   <chr>        <chr>        <chr>                        <chr>       <chr> <chr>
## 1 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT 2014  67.7…
## 2 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT 2015  65.3…
## 3 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT 2016  61.8…
## 4 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT 2017  57.7…
## 5 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT 2018  51.0…
## 6 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT 2019  46.1…
World_summary <- tidy_WorldData %>%
  group_by(Country.Name, Series.Name, Year) %>%
  summarize(Average_Value = mean(as.numeric(Value), na.rm = TRUE))
## Warning: There were 2547 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `Average_Value = mean(as.numeric(Value), na.rm = TRUE)`.
## ℹ In group 20: `Country.Name = "Argentina"`, `Series.Name = "Adolescent
##   fertility rate (births per 1,000 women ages 15-19)"`, `Year = "2023"`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2546 remaining warnings.
## `summarise()` has grouped output by 'Country.Name', 'Series.Name'. You can
## override using the `.groups` argument.
head(World_summary)
## # A tibble: 6 × 4
## # Groups:   Country.Name, Series.Name [1]
##   Country.Name Series.Name Year  Average_Value
##   <chr>        <chr>       <chr>         <dbl>
## 1 ""           ""          2014            NaN
## 2 ""           ""          2015            NaN
## 3 ""           ""          2016            NaN
## 4 ""           ""          2017            NaN
## 5 ""           ""          2018            NaN
## 6 ""           ""          2019            NaN
tidy_WorldData <- tidy_WorldData %>%
  mutate(Year = as.numeric(Year),
         Value = as.numeric(Value))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Value = as.numeric(Value)`.
## Caused by warning:
## ! NAs introduced by coercion
head(tidy_WorldData)
## # A tibble: 6 × 6
##   Country.Name Country.Code Series.Name                  Series.Code  Year Value
##   <chr>        <chr>        <chr>                        <chr>       <dbl> <dbl>
## 1 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2014  67.8
## 2 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2015  65.4
## 3 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2016  61.9
## 4 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2017  57.8
## 5 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2018  51.0
## 6 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2019  46.2
summary(tidy_WorldData)
##  Country.Name       Country.Code       Series.Name        Series.Code       
##  Length:10850       Length:10850       Length:10850       Length:10850      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       Year          Value           
##  Min.   :2014   Min.   :-3.090e+11  
##  1st Qu.:2016   1st Qu.: 6.000e+00  
##  Median :2018   Median : 3.300e+01  
##  Mean   :2018   Mean   : 2.981e+11  
##  3rd Qu.:2021   3rd Qu.: 1.208e+04  
##  Max.   :2023   Max.   : 3.439e+13  
##                 NA's   :2597
tidy_WorldData <- tidy_WorldData %>%
  filter(!is.na(Value))
head(tidy_WorldData)
## # A tibble: 6 × 6
##   Country.Name Country.Code Series.Name                  Series.Code  Year Value
##   <chr>        <chr>        <chr>                        <chr>       <dbl> <dbl>
## 1 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2014  67.8
## 2 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2015  65.4
## 3 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2016  61.9
## 4 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2017  57.8
## 5 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2018  51.0
## 6 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2019  46.2
fertility_data <- tidy_WorldData %>%
  filter(Series.Name == "Adolescent fertility rate (births per 1,000 women ages 15-19)")
head(fertility_data)
## # A tibble: 6 × 6
##   Country.Name Country.Code Series.Name                  Series.Code  Year Value
##   <chr>        <chr>        <chr>                        <chr>       <dbl> <dbl>
## 1 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2014  67.8
## 2 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2015  65.4
## 3 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2016  61.9
## 4 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2017  57.8
## 5 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2018  51.0
## 6 Argentina    ARG          Adolescent fertility rate (… SP.ADO.TFRT  2019  46.2