The following data is downloaded from the table Population aged 20-39 years, both sexes (%) from Gapminder
I would like to investigate the change in population of the 20-39 age group.
library(tidyr)
library(ggplot2)
library(ggthemes)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tmp = read.csv('clean.csv', row.names = 1, check.names = F)
head(tmp)
## Country 1950 1955 1960 1965 1970 1975
## 1 Afghanistan 28.29795 28.43349 28.48970 28.48662 28.55816 28.04832
## 2 Albania 26.60626 26.88406 27.56052 27.60835 26.94107 27.48855
## 3 Algeria 27.86155 27.70229 26.90064 25.84081 23.27611 23.74204
## 4 Angola 28.94356 28.42640 27.67019 26.77080 25.87539 25.85144
## 5 Argentina 32.70363 31.70320 30.47766 29.35833 28.73688 28.71751
## 6 Armenia 26.77515 28.53487 32.54818 29.75057 26.84670 28.01556
## 1980 1985 1990 1995 2000 2005 2010 2015
## 1 27.59733 27.21061 26.79068 26.74187 26.70789 26.96184 27.52782 28.25814
## 2 29.36775 31.27113 33.13070 31.82253 28.61799 28.40142 29.66235 31.48205
## 3 24.66773 26.70498 28.76355 30.51015 33.32022 35.83952 36.88980 36.35433
## 4 25.98676 26.25375 26.54975 26.43376 26.79647 27.17535 28.09078 29.13785
## 5 28.49329 28.35781 28.00960 28.05993 29.26176 29.99923 30.65952 30.85294
## 6 29.59974 33.37328 33.80361 30.63315 28.30679 27.80137 30.54028 32.13376
## 2020 2025 2030 2035 2040 2045 2050
## 1 29.05972 29.72047 30.16447 30.76133 31.50975 32.32804 33.06464
## 2 32.22522 29.84987 26.92420 24.76526 23.65464 24.14925 23.85710
## 3 33.45475 30.37476 28.45485 27.63377 28.13748 27.92545 26.76127
## 4 29.83065 30.76643 31.24774 31.83710 32.51237 32.74464 32.84910
## 5 29.91603 29.13497 28.47015 27.78811 27.14543 26.59227 25.78619
## 6 30.54156 28.08671 25.33123 24.38869 25.36069 25.17051 24.03049
df = gather(tmp,'year','n',2:22)
names(df)
## [1] "Country" "year" "n"
head(df)
## Country year n
## 1 Afghanistan 1950 28.29795
## 2 Albania 1950 26.60626
## 3 Algeria 1950 27.86155
## 4 Angola 1950 28.94356
## 5 Argentina 1950 32.70363
## 6 Armenia 1950 26.77515
china = subset(df, Country=='China')
ggplot(china, aes(x = year, y = n)) +
geom_line(aes(group=1)) +
scale_x_discrete(breaks=seq(1940, 2060, 5)) +
xlab('Year') +
ylab('Percentage of 20-39 population in China')
The graph above shows change in population (in percentage) of 20-39 years old in China. We can see that the ratio is dropping since year 2000.
The following graph shows the same data for 196 countries.
ggplot(df, aes(x = year, y = n)) +
geom_line(aes(group=Country, color=Country))
The graph is too chaotic, so I will focus on the following countries instead:
-United States -Japan -Canada -China -Hong Kong
sub_df = filter(df, Country %in% c("United States", "Japan", "Canada", "China", "Hong Kong, China"))
ggplot(sub_df, aes(x = year, y = n)) +
geom_line(aes(group=Country, color=Country)) +
theme(axis.text.x= element_text(angle = 45, hjust=1))
As we can see from the graph, the population of people between age 20-39 is dropping for these countries and regions. This maybe caused by a decreasing trend in total population. To verify that, I downloaded and imported the total population data:
dfpop = read.csv('cleaned total population.csv', check.names = F)
dfpop[1] = NULL
str(dfpop)
## 'data.frame': 241 obs. of 65 variables:
## $ country: Factor w/ 241 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ 1950 : num 8151455 1215002 8752997 18937 6197 ...
## $ 1951 : num 8276820 1239971 8953373 19287 6692 ...
## $ 1952 : num 8407148 1269302 9140990 19525 7247 ...
## $ 1953 : num 8542906 1302536 9326099 19659 7856 ...
## $ 1954 : num 8684494 1339240 9515990 19706 8514 ...
## $ 1955 : num 8832253 1378998 9715002 19688 9217 ...
## $ 1956 : num 8986449 1421417 9924347 19640 9964 ...
## $ 1957 : num 9147286 1466124 10142284 19603 10754 ...
## $ 1958 : num 9314915 1512765 10364614 19624 11585 ...
## $ 1959 : num 9489453 1561012 10585506 19757 12460 ...
## $ 1960 : num 9671046 1610565 10799997 20041 13377 ...
## $ 1961 : num 9859928 1661158 11006643 20500 14337 ...
## $ 1962 : num 10056480 1712563 11209845 21124 15337 ...
## $ 1963 : num 10261254 1764593 11420845 21870 16373 ...
## $ 1964 : num 10474903 1817098 11654905 22672 17438 ...
## $ 1965 : num 10697983 1869942 11923002 23480 18529 ...
## $ 1966 : num 10927724 1922993 12229853 24283 19640 ...
## $ 1967 : num 11163656 1976140 12572629 25087 20772 ...
## $ 1968 : num 11411022 2029314 12945462 25869 21931 ...
## $ 1969 : num 11676990 2082474 13338918 26608 23127 ...
## $ 1970 : num 11964906 2135599 13746185 27288 24364 ...
## $ 1971 : num 12273101 2188650 14165889 27907 25656 ...
## $ 1972 : num 12593688 2241623 14600659 28470 26997 ...
## $ 1973 : num 12915499 2294578 15052371 28983 28357 ...
## $ 1974 : num 13223928 2347607 15524137 29453 29688 ...
## $ 1975 : num 13505544 2400801 16018195 29897 30967 ...
## $ 1976 : num 13766792 2454255 16533323 30305 32156 ...
## $ 1977 : num 14003408 2508026 17068212 30696 33279 ...
## $ 1978 : num 14179656 2562121 17624756 31139 34432 ...
## $ 1979 : num 14249493 2616530 18205468 31727 35753 ...
## $ 1980 : num 14185729 2671300 18811199 32526 37328 ...
## $ 1981 : num 13984092 2725029 19442423 33557 39226 ...
## $ 1982 : num 13672870 2777592 20095648 34797 41390 ...
## $ 1983 : num 13300056 2831682 20762767 36203 43636 ...
## $ 1984 : num 12931791 2891004 21433070 37706 45702 ...
## $ 1985 : num 12625292 2957390 22098298 39253 47414 ...
## $ 1986 : num 12372113 3033393 22753511 40834 48653 ...
## $ 1987 : num 12183387 3116009 23398470 42446 49504 ...
## $ 1988 : num 12156685 3194854 24035237 44048 50236 ...
## $ 1989 : num 12414686 3255859 24668100 45595 51241 ...
## $ 1990 : num 13032161 3289483 25299182 47052 52773 ...
## $ 1991 : num 14069854 3291695 25930560 48402 54996 ...
## $ 1992 : num 15472076 3266983 26557969 49648 57767 ...
## $ 1993 : num 17053213 3224901 27169903 50801 60670 ...
## $ 1994 : num 18553819 3179442 27751086 51885 63111 ...
## $ 1995 : num 19789880 3141102 28291591 52919 64699 ...
## $ 1996 : num 20684982 3112597 28786855 53901 65227 ...
## $ 1997 : num 21299350 3091902 29242917 54834 64905 ...
## $ 1998 : num 21752257 3079037 29673694 55745 64246 ...
## $ 1999 : num 22227543 3072725 30099010 56667 63985 ...
## $ 2000 : num 22856302 3071856 30533827 57625 64634 ...
## $ 2001 : num 23677385 3077378 30982214 58633 66390 ...
## $ 2002 : num 24639841 3089778 31441848 59687 69043 ...
## $ 2003 : num 25678639 3106701 31913462 60774 72203 ...
## $ 2004 : num 26693486 3124861 32396048 61871 75292 ...
## $ 2005 : num 27614718 3141800 32888449 62962 77888 ...
## $ 2006 : num 28420974 3156607 33391954 64045 79874 ...
## $ 2007 : num 29145841 3169665 33906605 65130 81390 ...
## $ 2008 : num 29839994 3181397 34428028 66217 82577 ...
## $ 2009 : num 30577756 3192723 34950168 67312 83677 ...
## $ 2010 : num 31411743 3204284 35468208 68420 84864 ...
## $ 2011 : num 32358260 3215988 35980193 69543 86165 ...
## $ 2012 : num 33397058 3227373 36485828 70680 87518 ...
## $ 2013 : num 34499915 3238316 36983924 71834 88909 ...
dfpop = gather(dfpop,'year','count',2:65)
head(dfpop)
## country year count
## 1 Afghanistan 1950 8151455
## 2 Albania 1950 1215002
## 3 Algeria 1950 8752997
## 4 American Samoa 1950 18937
## 5 Andorra 1950 6197
## 6 Angola 1950 4147509
Now, I draw the total population of the aforementioned countries:
selected = dfpop %>%
filter(country %in% c("United States", "Japan", "Canada", "China", "Hong Kong, China"))
head(selected)
## country year count
## 1 Canada 1950 13736997
## 2 China 1950 550771433
## 3 Hong Kong, China 1950 1973998
## 4 Japan 1950 82199470
## 5 United States 1950 157813040
## 6 Canada 1951 14132558
ggplot(selected, aes(x=year, y=count))+
geom_line(aes(group=country,color=country)) +
theme(axis.text.x= element_text(angle = 45, hjust=1))
All these countries and regions have increased populations. This is not what I expected. I thought the total population might drop as well. Maybe the life expectancy is longer now so the majority of the population has moved to older age group.