Import libraries

library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)

Collect the data

This data was collected on Slack from the student: FOMBA KASSOH.

df3 = read.csv("https://raw.githubusercontent.com/Kossi-Akplaka/Data607-data_acquisition_and_management/main/Project%202/Data3-USA-Development-Indicators.csv")
kable(head(df3)) 
Country.Name Country.Code Series.Name Series.Code X1990..YR1990. X2000..YR2000. X2013..YR2013. X2014..YR2014. X2015..YR2015. X2016..YR2016. X2017..YR2017. X2018..YR2018. X2019..YR2019. X2020..YR2020. X2021..YR2021. X2022..YR2022.
United States USA Population, total SP.POP.TOTL 249623000 282162411 316059947 318386329 320738994 323071755 325122128 326838199 328329953 331511512 332031554 333287557
United States USA Population growth (annual %) SP.POP.GROW 1 1 1 1 1 1 1 1 0 1 0 0
United States USA Surface area (sq. km) AG.SRF.TOTL.K2 9629090 9632030 9831510 9831510 9831510 9831510 9831510 9831510 9831510 9831510 9831510 ..
United States USA Population density (people per sq. km of land area) EN.POP.DNST 27 31 35 35 35 35 36 36 36 36 36 ..
United States USA Poverty headcount ratio at national poverty lines (% of population) SI.POV.NAHC .. .. .. .. .. .. .. .. .. .. .. ..
United States USA Poverty headcount ratio at $2.15 a day (2017 PPP) (% of population) SI.POV.DDAY 1 1 1 1 1 1 1 1 1 0 0 ..

Let’s tidy the data

Remove columns 56 to 60

df3 <- df3[1:55,]

We can transform the data from wide to long

df3_long <- df3 %>%
  gather(Year, Count, X1990..YR1990.:X2022..YR2022.)
kable(head(df3_long))
Country.Name Country.Code Series.Name Series.Code Year Count
United States USA Population, total SP.POP.TOTL X1990..YR1990. 249623000
United States USA Population growth (annual %) SP.POP.GROW X1990..YR1990. 1
United States USA Surface area (sq. km) AG.SRF.TOTL.K2 X1990..YR1990. 9629090
United States USA Population density (people per sq. km of land area) EN.POP.DNST X1990..YR1990. 27
United States USA Poverty headcount ratio at national poverty lines (% of population) SI.POV.NAHC X1990..YR1990. ..
United States USA Poverty headcount ratio at $2.15 a day (2017 PPP) (% of population) SI.POV.DDAY X1990..YR1990. 1

Now we can tidy the column Year

df3_long$Year <- as.integer(sub("X(\\d+)\\.\\.YR\\d+\\.", "\\1", df3_long$Year))

head(df3_long$Year)
## [1] 1990 1990 1990 1990 1990 1990

Analysis

Let’s create another data frame with the total population of USA

USA_pop_total <- df3_long %>% 
  filter(Series.Name == "Population, total")
kable(USA_pop_total)
Country.Name Country.Code Series.Name Series.Code Year Count
United States USA Population, total SP.POP.TOTL 1990 249623000
United States USA Population, total SP.POP.TOTL 2000 282162411
United States USA Population, total SP.POP.TOTL 2013 316059947
United States USA Population, total SP.POP.TOTL 2014 318386329
United States USA Population, total SP.POP.TOTL 2015 320738994
United States USA Population, total SP.POP.TOTL 2016 323071755
United States USA Population, total SP.POP.TOTL 2017 325122128
United States USA Population, total SP.POP.TOTL 2018 326838199
United States USA Population, total SP.POP.TOTL 2019 328329953
United States USA Population, total SP.POP.TOTL 2020 331511512
United States USA Population, total SP.POP.TOTL 2021 332031554
United States USA Population, total SP.POP.TOTL 2022 333287557

Now we can plot the total population of USA between 1990 to 2022

ggplot(data = USA_pop_total, aes(x = Year, y = Count, group = 1)) +
  geom_line()

The population of USA grew from 249623000 to 333287557.

LS0tDQp0aXRsZTogIlByb2plY3QyLWRhdGEzIg0KYXV0aG9yOiAiS29zc2kgQWtwbGFrYSINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KIyMgSW1wb3J0IGxpYnJhcmllcw0KYGBge3IgaW1wb3J0LWxpYnJhcmllcywgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRpZHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShrbml0cikNCmBgYA0KDQojIyBDb2xsZWN0IHRoZSBkYXRhDQpUaGlzIGRhdGEgd2FzIGNvbGxlY3RlZCBvbiBTbGFjayBmcm9tIHRoZSBzdHVkZW50OiBGT01CQSBLQVNTT0guDQoNCmBgYHtyfQ0KZGYzID0gcmVhZC5jc3YoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9Lb3NzaS1Ba3BsYWthL0RhdGE2MDctZGF0YV9hY3F1aXNpdGlvbl9hbmRfbWFuYWdlbWVudC9tYWluL1Byb2plY3QlMjAyL0RhdGEzLVVTQS1EZXZlbG9wbWVudC1JbmRpY2F0b3JzLmNzdiIpDQprYWJsZShoZWFkKGRmMykpIA0KYGBgDQojIyBMZXQncyB0aWR5IHRoZSBkYXRhDQpSZW1vdmUgY29sdW1ucyA1NiB0byA2MA0KYGBge3J9DQpkZjMgPC0gZGYzWzE6NTUsXQ0KYGBgDQoNCldlIGNhbiB0cmFuc2Zvcm0gdGhlIGRhdGEgZnJvbSB3aWRlIHRvIGxvbmcNCg0KYGBge3J9DQpkZjNfbG9uZyA8LSBkZjMgJT4lDQogIGdhdGhlcihZZWFyLCBDb3VudCwgWDE5OTAuLllSMTk5MC46WDIwMjIuLllSMjAyMi4pDQprYWJsZShoZWFkKGRmM19sb25nKSkNCmBgYA0KTm93IHdlIGNhbiB0aWR5IHRoZSBjb2x1bW4gWWVhcg0KYGBge3J9DQpkZjNfbG9uZyRZZWFyIDwtIGFzLmludGVnZXIoc3ViKCJYKFxcZCspXFwuXFwuWVJcXGQrXFwuIiwgIlxcMSIsIGRmM19sb25nJFllYXIpKQ0KDQpoZWFkKGRmM19sb25nJFllYXIpDQpgYGANCiMjIEFuYWx5c2lzDQpMZXQncyBjcmVhdGUgYW5vdGhlciBkYXRhIGZyYW1lIHdpdGggdGhlIHRvdGFsIHBvcHVsYXRpb24gb2YgVVNBDQoNCmBgYHtyfQ0KVVNBX3BvcF90b3RhbCA8LSBkZjNfbG9uZyAlPiUgDQogIGZpbHRlcihTZXJpZXMuTmFtZSA9PSAiUG9wdWxhdGlvbiwgdG90YWwiKQ0Ka2FibGUoVVNBX3BvcF90b3RhbCkNCmBgYA0KDQpOb3cgd2UgY2FuIHBsb3QgdGhlIHRvdGFsIHBvcHVsYXRpb24gb2YgVVNBIGJldHdlZW4gMTk5MCB0byAyMDIyDQoNCmBgYHtyfQ0KZ2dwbG90KGRhdGEgPSBVU0FfcG9wX3RvdGFsLCBhZXMoeCA9IFllYXIsIHkgPSBDb3VudCwgZ3JvdXAgPSAxKSkgKw0KICBnZW9tX2xpbmUoKQ0KYGBgDQoNClRoZSBwb3B1bGF0aW9uIG9mIFVTQSBncmV3IGZyb20gYHIgbWluKFVTQV9wb3BfdG90YWwkQ291bnQpYCB0byBgciBtYXgoVVNBX3BvcF90b3RhbCRDb3VudClgLg0KDQoNCg==