library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(dplyr)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Reading the original .csv file that downloaded from http://data.un.org/
original_file <- read.csv("https://raw.githubusercontent.com/gpadmaperuma/DATA606/master/SYB62_T03_201907_Population%20Growth%2C%20Fertility%20and%20Mortality%20Indicators.csv", header = TRUE, skip = 1)
head(original_file) %>% kable()
Region.Country.Area | X | Year | Series | Value | Footnotes | Source |
---|---|---|---|---|---|---|
1 | Total, all countries or areas | 2005 | Population annual rate of increase (percent) | 1.2570 | Data refers to a 5-year period preceding the reference year. | United Nations Population Division, New York, World Population Prospects: The 2019 Revision, last accessed June 2019. |
1 | Total, all countries or areas | 2005 | Total fertility rate (children per women) | 2.6513 | Data refers to a 5-year period preceding the reference year. | United Nations Population Division, New York, World Population Prospects: The 2019 Revision; supplemented by data from the United Nations Statistics Division, New York, Demographic Yearbook 2015 and Secretariat for the Pacific Community (SPC) for small countries or areas, last accessed June 2019. |
1 | Total, all countries or areas | 2005 | Infant mortality for both sexes (per 1,000 live births) | 49.2161 | Data refers to a 5-year period preceding the reference year. | United Nations Statistics Division, New York, “Demographic Yearbook 2015” and the demographic statistics database, last accessed June 2017. |
1 | Total, all countries or areas | 2005 | Maternal mortality ratio (deaths per 100,000 population) | 288.0000 | World Health Organization (WHO), the United Nations Children’s Fund (UNICEF), the United Nations Population Fund (UNFPA), the World Bank and the United Nations Population Division, “Trends in Maternal Mortality 1990 - 2015.” | |
1 | Total, all countries or areas | 2005 | Life expectancy at birth for both sexes (years) | 67.0455 | Data refers to a 5-year period preceding the reference year. | United Nations Population Division, New York, World Population Prospects: The 2019 Revision; supplemented by data from the United Nations Statistics Division, New York, Demographic Yearbook 2015 and Secretariat for the Pacific Community (SPC) for small countries or areas, last accessed June 2019. |
1 | Total, all countries or areas | 2005 | Life expectancy at birth for males (years) | 64.8082 | Data refers to a 5-year period preceding the reference year. | United Nations Population Division, New York, World Population Prospects: The 2019 Revision; supplemented by data from the United Nations Statistics Division, New York, Demographic Yearbook 2015 and Secretariat for the Pacific Community (SPC) for small countries or areas, last accessed June 2019. |
dim(original_file)
## [1] 4984 7
While analyzing these data I will try to find solutions to my two questions:
(1) Which country/region/area has the highest Life Expectancy? (2) Is there a increase/decrease in Life Expectancy in these country/region/area?
There are 4984 Cases in this dataset. Each case represent population growth and indicators of fertility and mortality of the world
These data was obtained from the United Nations Database called UNdata:A world of information.
UNdata is a web-based data service for the global user community. These data are maintained by the Statistical Division of the Department of Economics and Social Affairs (UN DESA) of UN Secretariat. Most of the data sourced by UN partner organization such as UNICEF, UNDP, UNHCR, WHO etc.
These data are obtained as a part of UN research efforts in order to solve world economic, health and other problems.These are observational data collected in UN researches of those countries or regions.
UNdata: A world of information
The United Nations, Population Growth Fertility Mortality Indicators (2019). Retrieved from (http://data.un.org/)
The responce variable for this dataset is value which is a quantitative variable.It holds all the population, fertility and mortality rates.
Two Qualitative independent variables are the Region/Country/Area and Series and one quantitative independent variable is year that data was collected.
summary statistics for each the variables and appropriate visualizations
summary(original_file)
## Region.Country.Area X Year
## Min. : 1.0 Afghanistan: 21 Min. :2000
## 1st Qu.:152.0 Albania : 21 1st Qu.:2005
## Median :388.0 Algeria : 21 Median :2010
## Mean :393.4 Angola : 21 Mean :2010
## 3rd Qu.:624.0 Argentina : 21 3rd Qu.:2015
## Max. :894.0 Armenia : 21 Max. :2018
## (Other) :4858
## Series
## Infant mortality for both sexes (per 1,000 live births) :702
## Life expectancy at birth for both sexes (years) :705
## Life expectancy at birth for females (years) :735
## Life expectancy at birth for males (years) :735
## Maternal mortality ratio (deaths per 100,000 population):573
## Population annual rate of increase (percent) :799
## Total fertility rate (children per women) :735
## Value
## Min. : -4.978
## 1st Qu.: 3.074
## Median : 52.536
## Mean : 57.959
## 3rd Qu.: 73.586
## Max. :1986.136
##
## Footnotes
## Data refers to a 5-year period preceding the reference year. :3835
## : 659
## Data refers to a 5-year period preceding the reference year.;For statistical purposes, the data for China do not include those for the Hong Kong Special Administrative Region (Hong Kong SAR), Macao Special Administrative Region (Macao SAR) and Taiwan Province of China.: 18
## Data refers to a 5-year period preceding the reference year.;Including Abkhazia and South Ossetia. : 18
## Data refers to a 5-year period preceding the reference year.;Including Agalega, Rodrigues and Saint Brandon. : 18
## Data refers to a 5-year period preceding the reference year.;Including Åland Islands. : 18
## (Other) : 418
## Source
## United Nations Population Division, New York, World Population Prospects: The 2019 Revision, last accessed June 2019. : 799
## United Nations Population Division, New York, World Population Prospects: The 2019 Revision; supplemented by data from the United Nations Statistics Division, New York, Demographic Yearbook 2015 and Secretariat for the Pacific Community (SPC) for small countries or areas, last accessed June 2019.:2910
## United Nations Statistics Division, New York, "Demographic Yearbook 2015" and the demographic statistics database, last accessed June 2017. : 702
## World Health Organization (WHO), the United Nations Children's Fund (UNICEF), the United Nations Population Fund (UNFPA), the World Bank and the United Nations Population Division, "Trends in Maternal Mortality 1990 - 2015." : 573
##
##
##
describe(original_file)
## vars n mean sd median trimmed mad
## Region.Country.Area 1 4984 393.39 264.59 388.00 385.88 349.89
## X* 2 4984 133.17 76.52 132.00 133.08 99.33
## Year 3 4984 2009.95 4.12 2010.00 2009.97 7.41
## Series* 4 4984 4.03 2.02 4.00 4.03 2.97
## Value 5 4984 57.96 108.95 52.54 41.12 43.83
## Footnotes* 6 4984 10.95 6.20 11.00 10.55 0.00
## Source* 7 4984 2.21 0.85 2.00 2.14 0.00
## min max range skew kurtosis se
## Region.Country.Area 1.00 894.00 893.00 0.16 -1.23 3.75
## X* 1.00 265.00 264.00 0.01 -1.20 1.08
## Year 2000.00 2018.00 18.00 -0.03 -1.42 0.06
## Series* 1.00 7.00 6.00 0.00 -1.28 0.03
## Value -4.98 1986.14 1991.11 6.45 58.89 1.54
## Footnotes* 1.00 42.00 41.00 1.70 6.19 0.09
## Source* 1.00 4.00 3.00 0.72 0.07 0.01
as_tibble(original_file)
## # A tibble: 4,984 x 7
## Region.Country.A~ X Year Series Value Footnotes Source
## <int> <fct> <int> <fct> <dbl> <fct> <fct>
## 1 1 Total,~ 2005 Populati~ 1.26 Data refers~ United Na~
## 2 1 Total,~ 2005 Total fe~ 2.65 Data refers~ United Na~
## 3 1 Total,~ 2005 Infant m~ 49.2 Data refers~ "United N~
## 4 1 Total,~ 2005 Maternal~ 288 "" "World He~
## 5 1 Total,~ 2005 Life exp~ 67.0 Data refers~ United Na~
## 6 1 Total,~ 2005 Life exp~ 64.8 Data refers~ United Na~
## 7 1 Total,~ 2005 Life exp~ 69.4 Data refers~ United Na~
## 8 1 Total,~ 2010 Populati~ 1.23 Data refers~ United Na~
## 9 1 Total,~ 2010 Total fe~ 2.58 Data refers~ United Na~
## 10 1 Total,~ 2010 Infant m~ 41.0 Data refers~ "United N~
## # ... with 4,974 more rows
Original data includes values for both regions and countries. I will create two subsets one for regions and one for Countries. In this way it will be easy for us to visualize data in a more organized manner.
# Deleting unwanted columns from the original file and save as a new data frame.
UN_PopulationGrowth <-
select(original_file, -c("Region.Country.Area","Footnotes", "Source"))
head(UN_PopulationGrowth)
## X Year
## 1 Total, all countries or areas 2005
## 2 Total, all countries or areas 2005
## 3 Total, all countries or areas 2005
## 4 Total, all countries or areas 2005
## 5 Total, all countries or areas 2005
## 6 Total, all countries or areas 2005
## Series Value
## 1 Population annual rate of increase (percent) 1.2570
## 2 Total fertility rate (children per women) 2.6513
## 3 Infant mortality for both sexes (per 1,000 live births) 49.2161
## 4 Maternal mortality ratio (deaths per 100,000 population) 288.0000
## 5 Life expectancy at birth for both sexes (years) 67.0455
## 6 Life expectancy at birth for males (years) 64.8082
Population_Region <- UN_PopulationGrowth %>%
slice(22:564)
names(Population_Region)[names(Population_Region) == "X"] <- "Region"
head(Population_Region)
## Region Year Series
## 1 Africa 2005 Population annual rate of increase (percent)
## 2 Africa 2005 Total fertility rate (children per women)
## 3 Africa 2005 Infant mortality for both sexes (per 1,000 live births)
## 4 Africa 2005 Life expectancy at birth for both sexes (years)
## 5 Africa 2005 Life expectancy at birth for males (years)
## 6 Africa 2005 Life expectancy at birth for females (years)
## Value
## 1 2.4390
## 2 5.0771
## 3 81.0492
## 4 53.5269
## 5 51.9582
## 6 55.1343
by_Region <- Population_Region %>%
spread(key = Series, value = Value)
names(by_Region)[names(by_Region) == "Infant mortality for both sexes (per 1,000 live births)"] <- "Infant_Mortality"
names(by_Region)[names(by_Region) == "Life expectancy at birth for both sexes (years)"] <- "Life_Expectancy"
names(by_Region)[names(by_Region) == "Maternal mortality ratio (deaths per 100,000 population)"] <- "Maternal_mortality_ratio"
names(by_Region)[names(by_Region) == "Life expectancy at birth for males (years)"] <- "LifeExpectancy_males"
names(by_Region)[names(by_Region) == "Life expectancy at birth for females (years)"] <- "LifeExpectancy_females"
names(by_Region)[names(by_Region) == "Population annual rate of increase (percent)"] <- "Population_increase_rate"
names(by_Region)[names(by_Region) == "Total fertility rate (children per women)"] <- "Total_fertility_rate"
head(by_Region)
## Region Year Infant_Mortality Life_Expectancy LifeExpectancy_females
## 1 Africa 2005 81.0492 53.5269 55.1343
## 2 Africa 2010 67.7143 56.7825 58.3389
## 3 Africa 2015 55.9325 60.2471 61.9302
## 4 Asia 2005 45.8017 68.3315 70.1224
## 5 Asia 2010 37.1114 70.0293 71.9654
## 6 Asia 2015 29.5012 71.8300 74.0127
## LifeExpectancy_males Maternal_mortality_ratio Population_increase_rate
## 1 51.9582 NA 2.439
## 2 55.2459 NA 2.522
## 3 58.5824 NA 2.581
## 4 66.6483 NA 1.227
## 5 68.2175 NA 1.132
## 6 69.8000 NA 1.036
## Total_fertility_rate
## 1 5.0771
## 2 4.9000
## 3 4.7301
## 4 2.4467
## 5 2.3281
## 6 2.2098
Using some data such as Infant mortality rate, Life expectancy for both sexes, males,and females, I am creating some interacting scatter-plots for better understand of these populations around the world. All the data are for the years: 2005, 2005, 2015.
# Infant mortality rate by region
g <- ggplot(by_Region, aes(x = Infant_Mortality, y = Region, text = Year))+
geom_point(aes(color=Region))
# Life expectancy for both sexes by region
g<-ggplot(by_Region, aes(x = Life_Expectancy, y = Region, text = Year))+
geom_point(aes(color=Region))
ggplotly(g)
# Life expectancy for Males by region
g<-ggplot(by_Region, aes(x = LifeExpectancy_males, y = Region, text = Year))+
geom_point(aes(color=Region))
ggplotly(g)
# Life expectancy for Females by region
g<-ggplot(by_Region, aes(x = LifeExpectancy_females, y = Region, text = Year))+
geom_point(aes(color=Region))
knitr::opts_chunk$set(fig.width=12, fig.height=8)
ggplotly(g)
g <- ggplot(by_Region, aes(x = Region, y = Life_Expectancy, fill = as.character(Year))) +
geom_bar(stat = "Identity", position = "dodge") +
geom_text(aes(label = paste0(round(by_Region$Life_Expectancy,0))), hjust=-0.5, color="black", position = position_dodge(1), size = 2)+
scale_fill_brewer(palette = "Paired") +
theme(axis.text.x=element_text(angle = 0, vjust = 1)) +
theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom") +
ggtitle("Life Expectancy by Region") +
xlab("Regions") + ylab ("Age in Years") +
coord_flip()
ggplotly(g)