library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
Data 3 Population, total https://data.worldbank.org/indicator/SP.POP.TOTL
ï· Create a .CSV file (or optionally, a MySQL database!) that includes all of the information included in the dataset. Youâre encouraged to use a âwideâ structure similar to how the information appears in the discussion item, so that you can practice tidying and transformations as described below. ï· Read the information from your .CSV file into R, and use tidyr and dplyr as needed to tidy and transform your data. [Most of your grade will be based on this step!]
pop <- read.csv("API_SP.POP.TOTL_DS2_en_csv_v2_10473719.csv", skip = 4)
View first few rows of the data
head(pop)
## Country.Name Country.Code Indicator.Name Indicator.Code X1960
## 1 Aruba ABW Population, total SP.POP.TOTL 54211
## 2 Afghanistan AFG Population, total SP.POP.TOTL 8996351
## 3 Angola AGO Population, total SP.POP.TOTL 5643182
## 4 Albania ALB Population, total SP.POP.TOTL 1608800
## 5 Andorra AND Population, total SP.POP.TOTL 13411
## 6 Arab World ARB Population, total SP.POP.TOTL 92490932
## X1961 X1962 X1963 X1964 X1965 X1966 X1967
## 1 55438 56225 56695 57032 57360 57715 58055
## 2 9166764 9345868 9533954 9731361 9938414 10152331 10372630
## 3 5753024 5866061 5980417 6093321 6203299 6309770 6414995
## 4 1659800 1711319 1762621 1814135 1864791 1914573 1965598
## 5 14375 15370 16412 17469 18549 19647 20758
## 6 95044497 97682294 100411076 103239902 106174988 109230593 112406932
## X1968 X1969 X1970 X1971 X1972 X1973 X1974
## 1 58386 58726 59063 59440 59840 60243 60528
## 2 10604346 10854428 11126123 11417825 11721940 12027822 12321541
## 3 6523791 6642632 6776381 6927269 7094834 7277960 7474338
## 4 2022272 2081695 2135479 2187853 2243126 2296752 2350124
## 5 21890 23058 24276 25559 26892 28232 29520
## 6 115680165 119016542 122398374 125807419 129269375 132863416 136696761
## X1975 X1976 X1977 X1978 X1979 X1980 X1981
## 1 60657 60586 60366 60103 59980 60096 60567
## 2 12590286 12840299 13067538 13237734 13306695 13248370 13053954
## 3 7682479 7900997 8130988 8376147 8641521 8929900 9244507
## 4 2404831 2458526 2513546 2566266 2617832 2671997 2726056
## 5 30705 31777 32771 33737 34818 36067 37500
## 6 140843298 145332378 150133054 155183724 160392488 165689490 171051950
## X1982 X1983 X1984 X1985 X1986 X1987 X1988
## 1 61345 62201 62836 63026 62644 61833 61079
## 2 12749645 12389269 12047115 11783050 11601041 11502761 11540888
## 3 9582156 9931562 10277321 10609042 10921037 11218268 11513968
## 4 2784278 2843960 2904429 2964762 3022635 3083605 3142336
## 5 39114 40867 42706 44600 46517 48455 50434
## 6 176490084 182005827 187610756 193310301 199093767 204942549 210844771
## X1989 X1990 X1991 X1992 X1993 X1994 X1995
## 1 61032 62149 64622 68235 72504 76700 80324
## 2 11777609 12249114 12993657 13981231 15095099 16172719 17099541
## 3 11827237 12171441 12553446 12968345 13403734 13841301 14268994
## 4 3227943 3286542 3266790 3247039 3227287 3207536 3187784
## 5 52448 54509 56671 58888 60971 62677 63850
## 6 216787402 224735446 230829868 235037179 241286091 247435930 255029671
## X1996 X1997 X1998 X1999 X2000 X2001 X2002
## 1 83200 85451 87277 89005 90853 92898 94992
## 2 17822884 18381605 18863999 19403676 20093756 20966463 21979923
## 3 14682284 15088981 15504318 15949766 16440924 16983266 17572649
## 4 3168033 3148281 3128530 3108778 3089027 3060173 3051010
## 5 64360 64327 64142 64370 65390 67341 70049
## 6 260843462 266575075 272235146 277962869 283832016 289850357 296026575
## X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 97017 98737 100031 100832 101220 101353 101453
## 2 23064851 24118979 25070798 25893450 26616792 27294031 28004331
## 3 18203369 18865716 19552542 20262399 20997687 21759420 22549547
## 4 3039616 3026939 3011487 2992547 2970017 2947314 2927519
## 5 73182 76244 78867 80991 82683 83861 84462
## 6 302434519 309162029 316264728 323773264 331653797 339825483 348145094
## X2010 X2011 X2012 X2013 X2014 X2015 X2016
## 1 101669 102053 102577 103187 103795 104341 104822
## 2 28803167 29708599 30696958 31731688 32758020 33736494 34656032
## 3 23369131 24218565 25096150 25998340 26920466 27859305 28813463
## 4 2913021 2905195 2900401 2895092 2889104 2880703 2876101
## 5 84449 83751 82431 80788 79223 78014 77281
## 6 356508908 364895878 373306993 381702086 390043028 398304960 406452690
## X2017 X2018 X
## 1 105264 NA NA
## 2 35530081 NA NA
## 3 29784193 NA NA
## 4 2873457 NA NA
## 5 76965 NA NA
## 6 414491886 NA NA
Drop unnecessary variables
pop <- pop %>%
select("Country.Name",starts_with("X"),-X)
head(pop)
## Country.Name X1960 X1961 X1962 X1963 X1964 X1965
## 1 Aruba 54211 55438 56225 56695 57032 57360
## 2 Afghanistan 8996351 9166764 9345868 9533954 9731361 9938414
## 3 Angola 5643182 5753024 5866061 5980417 6093321 6203299
## 4 Albania 1608800 1659800 1711319 1762621 1814135 1864791
## 5 Andorra 13411 14375 15370 16412 17469 18549
## 6 Arab World 92490932 95044497 97682294 100411076 103239902 106174988
## X1966 X1967 X1968 X1969 X1970 X1971 X1972
## 1 57715 58055 58386 58726 59063 59440 59840
## 2 10152331 10372630 10604346 10854428 11126123 11417825 11721940
## 3 6309770 6414995 6523791 6642632 6776381 6927269 7094834
## 4 1914573 1965598 2022272 2081695 2135479 2187853 2243126
## 5 19647 20758 21890 23058 24276 25559 26892
## 6 109230593 112406932 115680165 119016542 122398374 125807419 129269375
## X1973 X1974 X1975 X1976 X1977 X1978 X1979
## 1 60243 60528 60657 60586 60366 60103 59980
## 2 12027822 12321541 12590286 12840299 13067538 13237734 13306695
## 3 7277960 7474338 7682479 7900997 8130988 8376147 8641521
## 4 2296752 2350124 2404831 2458526 2513546 2566266 2617832
## 5 28232 29520 30705 31777 32771 33737 34818
## 6 132863416 136696761 140843298 145332378 150133054 155183724 160392488
## X1980 X1981 X1982 X1983 X1984 X1985 X1986
## 1 60096 60567 61345 62201 62836 63026 62644
## 2 13248370 13053954 12749645 12389269 12047115 11783050 11601041
## 3 8929900 9244507 9582156 9931562 10277321 10609042 10921037
## 4 2671997 2726056 2784278 2843960 2904429 2964762 3022635
## 5 36067 37500 39114 40867 42706 44600 46517
## 6 165689490 171051950 176490084 182005827 187610756 193310301 199093767
## X1987 X1988 X1989 X1990 X1991 X1992 X1993
## 1 61833 61079 61032 62149 64622 68235 72504
## 2 11502761 11540888 11777609 12249114 12993657 13981231 15095099
## 3 11218268 11513968 11827237 12171441 12553446 12968345 13403734
## 4 3083605 3142336 3227943 3286542 3266790 3247039 3227287
## 5 48455 50434 52448 54509 56671 58888 60971
## 6 204942549 210844771 216787402 224735446 230829868 235037179 241286091
## X1994 X1995 X1996 X1997 X1998 X1999 X2000
## 1 76700 80324 83200 85451 87277 89005 90853
## 2 16172719 17099541 17822884 18381605 18863999 19403676 20093756
## 3 13841301 14268994 14682284 15088981 15504318 15949766 16440924
## 4 3207536 3187784 3168033 3148281 3128530 3108778 3089027
## 5 62677 63850 64360 64327 64142 64370 65390
## 6 247435930 255029671 260843462 266575075 272235146 277962869 283832016
## X2001 X2002 X2003 X2004 X2005 X2006 X2007
## 1 92898 94992 97017 98737 100031 100832 101220
## 2 20966463 21979923 23064851 24118979 25070798 25893450 26616792
## 3 16983266 17572649 18203369 18865716 19552542 20262399 20997687
## 4 3060173 3051010 3039616 3026939 3011487 2992547 2970017
## 5 67341 70049 73182 76244 78867 80991 82683
## 6 289850357 296026575 302434519 309162029 316264728 323773264 331653797
## X2008 X2009 X2010 X2011 X2012 X2013 X2014
## 1 101353 101453 101669 102053 102577 103187 103795
## 2 27294031 28004331 28803167 29708599 30696958 31731688 32758020
## 3 21759420 22549547 23369131 24218565 25096150 25998340 26920466
## 4 2947314 2927519 2913021 2905195 2900401 2895092 2889104
## 5 83861 84462 84449 83751 82431 80788 79223
## 6 339825483 348145094 356508908 364895878 373306993 381702086 390043028
## X2015 X2016 X2017 X2018
## 1 104341 104822 105264 NA
## 2 33736494 34656032 35530081 NA
## 3 27859305 28813463 29784193 NA
## 4 2880703 2876101 2873457 NA
## 5 78014 77281 76965 NA
## 6 398304960 406452690 414491886 NA
Remove unncessary variable X2018
pop <- pop %>%
select(-X2018)
head(pop)
## Country.Name X1960 X1961 X1962 X1963 X1964 X1965
## 1 Aruba 54211 55438 56225 56695 57032 57360
## 2 Afghanistan 8996351 9166764 9345868 9533954 9731361 9938414
## 3 Angola 5643182 5753024 5866061 5980417 6093321 6203299
## 4 Albania 1608800 1659800 1711319 1762621 1814135 1864791
## 5 Andorra 13411 14375 15370 16412 17469 18549
## 6 Arab World 92490932 95044497 97682294 100411076 103239902 106174988
## X1966 X1967 X1968 X1969 X1970 X1971 X1972
## 1 57715 58055 58386 58726 59063 59440 59840
## 2 10152331 10372630 10604346 10854428 11126123 11417825 11721940
## 3 6309770 6414995 6523791 6642632 6776381 6927269 7094834
## 4 1914573 1965598 2022272 2081695 2135479 2187853 2243126
## 5 19647 20758 21890 23058 24276 25559 26892
## 6 109230593 112406932 115680165 119016542 122398374 125807419 129269375
## X1973 X1974 X1975 X1976 X1977 X1978 X1979
## 1 60243 60528 60657 60586 60366 60103 59980
## 2 12027822 12321541 12590286 12840299 13067538 13237734 13306695
## 3 7277960 7474338 7682479 7900997 8130988 8376147 8641521
## 4 2296752 2350124 2404831 2458526 2513546 2566266 2617832
## 5 28232 29520 30705 31777 32771 33737 34818
## 6 132863416 136696761 140843298 145332378 150133054 155183724 160392488
## X1980 X1981 X1982 X1983 X1984 X1985 X1986
## 1 60096 60567 61345 62201 62836 63026 62644
## 2 13248370 13053954 12749645 12389269 12047115 11783050 11601041
## 3 8929900 9244507 9582156 9931562 10277321 10609042 10921037
## 4 2671997 2726056 2784278 2843960 2904429 2964762 3022635
## 5 36067 37500 39114 40867 42706 44600 46517
## 6 165689490 171051950 176490084 182005827 187610756 193310301 199093767
## X1987 X1988 X1989 X1990 X1991 X1992 X1993
## 1 61833 61079 61032 62149 64622 68235 72504
## 2 11502761 11540888 11777609 12249114 12993657 13981231 15095099
## 3 11218268 11513968 11827237 12171441 12553446 12968345 13403734
## 4 3083605 3142336 3227943 3286542 3266790 3247039 3227287
## 5 48455 50434 52448 54509 56671 58888 60971
## 6 204942549 210844771 216787402 224735446 230829868 235037179 241286091
## X1994 X1995 X1996 X1997 X1998 X1999 X2000
## 1 76700 80324 83200 85451 87277 89005 90853
## 2 16172719 17099541 17822884 18381605 18863999 19403676 20093756
## 3 13841301 14268994 14682284 15088981 15504318 15949766 16440924
## 4 3207536 3187784 3168033 3148281 3128530 3108778 3089027
## 5 62677 63850 64360 64327 64142 64370 65390
## 6 247435930 255029671 260843462 266575075 272235146 277962869 283832016
## X2001 X2002 X2003 X2004 X2005 X2006 X2007
## 1 92898 94992 97017 98737 100031 100832 101220
## 2 20966463 21979923 23064851 24118979 25070798 25893450 26616792
## 3 16983266 17572649 18203369 18865716 19552542 20262399 20997687
## 4 3060173 3051010 3039616 3026939 3011487 2992547 2970017
## 5 67341 70049 73182 76244 78867 80991 82683
## 6 289850357 296026575 302434519 309162029 316264728 323773264 331653797
## X2008 X2009 X2010 X2011 X2012 X2013 X2014
## 1 101353 101453 101669 102053 102577 103187 103795
## 2 27294031 28004331 28803167 29708599 30696958 31731688 32758020
## 3 21759420 22549547 23369131 24218565 25096150 25998340 26920466
## 4 2947314 2927519 2913021 2905195 2900401 2895092 2889104
## 5 83861 84462 84449 83751 82431 80788 79223
## 6 339825483 348145094 356508908 364895878 373306993 381702086 390043028
## X2015 X2016 X2017
## 1 104341 104822 105264
## 2 33736494 34656032 35530081
## 3 27859305 28813463 29784193
## 4 2880703 2876101 2873457
## 5 78014 77281 76965
## 6 398304960 406452690 414491886
Drop unnecessary rows
Convert wide to long format
pop <- gather(pop, "Year","Population",-Country.Name)
head(pop)
## Country.Name Year Population
## 1 Aruba X1960 54211
## 2 Afghanistan X1960 8996351
## 3 Angola X1960 5643182
## 4 Albania X1960 1608800
## 5 Andorra X1960 13411
## 6 Arab World X1960 92490932
Remove X from Year
pop$Year <- str_replace(pop$Year,"X","")
See data type of the variables
sapply(pop, class)
## Country.Name Year Population
## "factor" "character" "numeric"
Correct data type by converting Year to Numeric
pop$Year <- as.numeric(pop$Year)
See again data type of the variables
sapply(pop, class)
## Country.Name Year Population
## "factor" "numeric" "numeric"
Rename variable names
colnames(pop)[1] <- "Country"
Show first 10 rows
head(pop,10)
## Country Year Population
## 1 Aruba 1960 54211
## 2 Afghanistan 1960 8996351
## 3 Angola 1960 5643182
## 4 Albania 1960 1608800
## 5 Andorra 1960 13411
## 6 Arab World 1960 92490932
## 7 United Arab Emirates 1960 92634
## 8 Argentina 1960 20619075
## 9 Armenia 1960 1874120
## 10 American Samoa 1960 20013
ï· Perform the analysis requested in the discussion item. Worlds total population change over the years
pop %>%
group_by(Year)%>%
summarise("Total Population" = sum(Population, na.rm = TRUE))%>%
ggplot(aes(Year,`Total Population`))+geom_line()+theme_classic()+ggtitle("World Population Change Over The years")+ylab("Population")
From the graph we can see that world populatin has increased over the years.
Top 10 countries with highest population in 2017
pop %>%
filter(Year == max(Year))%>%
arrange(-Population)%>%
head(30)
## Country Year Population
## 1 World 2017 7530360149
## 2 IDA & IBRD total 2017 6353204601
## 3 Low & middle income 2017 6281293921
## 4 Middle income 2017 5548845363
## 5 IBRD only 2017 4743263932
## 6 Early-demographic dividend 2017 3213426923
## 7 Lower middle income 2017 2972642807
## 8 Upper middle income 2017 2576202556
## 9 East Asia & Pacific 2017 2314364990
## 10 Late-demographic dividend 2017 2278227192
## 11 East Asia & Pacific (excluding high income) 2017 2068308373
## 12 East Asia & Pacific (IDA & IBRD countries) 2017 2042783496
## 13 South Asia 2017 1788388852
## 14 South Asia (IDA & IBRD) 2017 1788388852
## 15 IDA total 2017 1609940669
## 16 China 2017 1386395000
## 17 India 2017 1339180127
## 18 OECD members 2017 1300865255
## 19 High income 2017 1249066228
## 20 Post-demographic dividend 2017 1107374909
## 21 IDA only 2017 1076917211
## 22 Sub-Saharan Africa 2017 1061107721
## 23 Sub-Saharan Africa (IDA & IBRD countries) 2017 1061107721
## 24 Sub-Saharan Africa (excluding high income) 2017 1061011878
## 25 Least developed countries: UN classification 2017 1002485957
## 26 Europe & Central Asia 2017 915545801
## 27 Pre-demographic dividend 2017 904399841
## 28 Heavily indebted poor countries (HIPC) 2017 765112280
## 29 Low income 2017 732448558
## 30 Latin America & Caribbean 2017 644137666
Top 10 countries with lowest population in 2017
pop %>%
filter(Year == max(Year))%>%
arrange(Population)%>%
head(10)
## Country Year Population
## 1 Tuvalu 2017 11192
## 2 Nauru 2017 13649
## 3 Palau 2017 21729
## 4 British Virgin Islands 2017 31196
## 5 St. Martin (French part) 2017 32125
## 6 San Marino 2017 33400
## 7 Gibraltar 2017 34571
## 8 Turks and Caicos Islands 2017 35446
## 9 Liechtenstein 2017 37922
## 10 Monaco 2017 38695
Conclusion: We conclude that world population has increased over the years. China and India has the highest population in 2017. Tuvalu has the lowest population in 2017
ï· Your code should be in an R Markdown file, posted to rpubs.com, and should include narrative descriptions of your data cleanup work, analysis, and conclusions.