This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(animation)
library(car)
library(corrplot)
library(datasets)
library(gapminder)
library(gcookbook)
library(geomnet)
library(GGally)
library(ggmap)
library(htmlwidgets)
library(leaflet)
library(manipulateWidget)
library(maps)
library(PerformanceAnalytics)
library(plotly)
library(radarchart)
library(RColorBrewer)
library(rmarkdown)
library(shiny)
library(shinydashboard)
library(tidyverse)
library(treemap)
library(WDI)
library(grid)
library(devtools)
library(gridExtra)
library(scales)
# Read the world indicators data
worldIndicators <- read_csv(file.choose())
Parsed with column specification:
cols(
.default = col_character(),
`Business Tax Rate` = [33mcol_logical()[39m,
`CO2 Emissions` = [32mcol_double()[39m,
LifeExpectancy = [32mcol_double()[39m,
`Days to Start Business` = [32mcol_double()[39m,
`Ease of Business` = [33mcol_logical()[39m,
`Energy Usage` = [32mcol_double()[39m,
`Hours to do Tax` = [33mcol_logical()[39m,
`Lending Interest` = [32mcol_double()[39m,
`Life Expectancy Female` = [32mcol_double()[39m,
`Life Expectancy Male` = [32mcol_double()[39m,
`Number of Records` = [32mcol_double()[39m,
`Population 65+` = [32mcol_double()[39m
)
See spec(...) for full column specifications.
3023 parsing failures.
row col expected actual file
1041 Business Tax Rate 1/0/T/F/TRUE/FALSE 0.769 'C:\My Data\World Indicators.csv'
1041 Hours to do Tax 1/0/T/F/TRUE/FALSE 451 'C:\My Data\World Indicators.csv'
1042 Business Tax Rate 1/0/T/F/TRUE/FALSE 0.521 'C:\My Data\World Indicators.csv'
1042 Hours to do Tax 1/0/T/F/TRUE/FALSE 272 'C:\My Data\World Indicators.csv'
1043 Business Tax Rate 1/0/T/F/TRUE/FALSE 0.757 'C:\My Data\World Indicators.csv'
.... ................. .................. ...... ...................................
See problems(...) for more details.
view(worldIndicators)
str(worldIndicators)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 2704 obs. of 33 variables:
$ BirthRate : chr "2.00%" "5.00%" "4.30%" "2.70%" ...
$ Business Tax Rate : logi NA NA NA NA NA NA ...
$ CO2 Emissions : num 87931 9542 1617 4276 1041 ...
$ Above Threshold? : chr "Below Threshold" "Below Threshold" "Below Threshold" "Below Threshold" ...
$ GDP per Capita : chr "$1,727" "$656" "$339" "$3,297" ...
$ Birth Rate Bin : chr "1.5-3%" "Above 3%" "Above 3%" "1.5-3%" ...
$ LifeExpectancy : num 69 46 55 51 51 48 52 44 47 58 ...
$ Ease of Business (clusters): chr "Low" "Low" "Low" "Friendly" ...
$ Country : chr "Algeria" "Angola" "Benin" "Botswana" ...
$ Days to Start Business : num NA NA NA NA NA NA NA NA NA NA ...
$ Ease of Business : logi NA NA NA NA NA NA ...
$ Energy Usage : num 26998 7499 1983 1836 NA ...
$ GDP per Capita (bin) : chr "$0" "$0" "$0" "$0" ...
$ GDP : chr "$55B" "$9B" "$2B" "$6B" ...
$ Health Exp % GDP : chr "3.50%" "3.40%" "4.30%" "4.70%" ...
$ Health Exp/Capita : chr "$60" "$22" "$15" "$152" ...
$ Hours to do Tax : logi NA NA NA NA NA NA ...
$ InfantMortalityRate : chr "3.40%" "12.80%" "9.00%" "5.40%" ...
$ Internet Usage : chr "1%" "0%" "0%" "3%" ...
$ Lending Interest : num 0.1 1.032 NA 0.155 NA ...
$ Life Expectancy Female : num 71 47 57 52 52 49 53 45 48 59 ...
$ Life Expectancy Male : num 67 44 53 49 49 47 51 42 46 56 ...
$ Mobile Phone Usage : chr "0%" "0%" "1%" "13%" ...
$ Number of Records : num 1 1 1 1 1 1 1 1 1 1 ...
$ Population 0-14 : chr "34%" "48%" "45%" "38%" ...
$ Population 15-64 : chr "62%" "50%" "52%" "59%" ...
$ Population 65+ : num 0.039 0.025 0.029 0.029 0.028 0.029 0.034 0.04 0.028 0.031 ...
$ Population Total : chr "31.7M" "13.9M" "6.9M" "1.8M" ...
$ Population Urban : chr "59.90%" "32.40%" "38.30%" "53.20%" ...
$ Region : chr "Africa" "Africa" "Africa" "Africa" ...
$ Tourism Inbound : chr "$0B" "$0B" "$0B" "$0B" ...
$ Tourism Outbound : chr "$0B" "$0B" "$0B" "$0B" ...
$ Year : chr "12/1/2000" "12/1/2000" "12/1/2000" "12/1/2000" ...
- attr(*, "problems")=Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 3023 obs. of 5 variables:
..$ row : int 1041 1041 1042 1042 1043 1043 1044 1044 1045 1045 ...
..$ col : chr "Business Tax Rate" "Hours to do Tax" "Business Tax Rate" "Hours to do Tax" ...
..$ expected: chr "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
..$ actual : chr "0.769" "451" "0.521" "272" ...
..$ file : chr "'C:\\My Data\\World Indicators.csv'" "'C:\\My Data\\World Indicators.csv'" "'C:\\My Data\\World Indicators.csv'" "'C:\\My Data\\World Indicators.csv'" ...
- attr(*, "spec")=
.. cols(
.. BirthRate = [31mcol_character()[39m,
.. `Business Tax Rate` = [33mcol_logical()[39m,
.. `CO2 Emissions` = [32mcol_double()[39m,
.. `Above Threshold?` = [31mcol_character()[39m,
.. `GDP per Capita` = [31mcol_character()[39m,
.. `Birth Rate Bin` = [31mcol_character()[39m,
.. LifeExpectancy = [32mcol_double()[39m,
.. `Ease of Business (clusters)` = [31mcol_character()[39m,
.. Country = [31mcol_character()[39m,
.. `Days to Start Business` = [32mcol_double()[39m,
.. `Ease of Business` = [33mcol_logical()[39m,
.. `Energy Usage` = [32mcol_double()[39m,
.. `GDP per Capita (bin)` = [31mcol_character()[39m,
.. GDP = [31mcol_character()[39m,
.. `Health Exp % GDP` = [31mcol_character()[39m,
.. `Health Exp/Capita` = [31mcol_character()[39m,
.. `Hours to do Tax` = [33mcol_logical()[39m,
.. InfantMortalityRate = [31mcol_character()[39m,
.. `Internet Usage` = [31mcol_character()[39m,
.. `Lending Interest` = [32mcol_double()[39m,
.. `Life Expectancy Female` = [32mcol_double()[39m,
.. `Life Expectancy Male` = [32mcol_double()[39m,
.. `Mobile Phone Usage` = [31mcol_character()[39m,
.. `Number of Records` = [32mcol_double()[39m,
.. `Population 0-14` = [31mcol_character()[39m,
.. `Population 15-64` = [31mcol_character()[39m,
.. `Population 65+` = [32mcol_double()[39m,
.. `Population Total` = [31mcol_character()[39m,
.. `Population Urban` = [31mcol_character()[39m,
.. Region = [31mcol_character()[39m,
.. `Tourism Inbound` = [31mcol_character()[39m,
.. `Tourism Outbound` = [31mcol_character()[39m,
.. Year = [31mcol_character()[39m
.. )
# Data Wrangling and Cleaning
worldIndicators2 <- select(worldIndicators, c("BirthRate", "LifeExpectancy", "InfantMortalityRate", "Region", "Year"))
worldIndicators2 <- filter(worldIndicators2, Year > "12/1/2003" & Year < "12/1/2011")
glimpse(worldIndicators2)
Observations: 1,456
Variables: 5
$ BirthRate [3m[38;5;246m<chr>[39m[23m "2.00%", "5.00%", "4.10%", "2.60%", "4.50%", "4.30%", "4.10%", "3.80%", "5.00%", "3.90%", "4.60%", "3.90%", "3.70%", "2.90%",...
$ LifeExpectancy [3m[38;5;246m<dbl>[39m[23m 70, 48, 57, 48, 52, 50, 52, 45, 48, 59, 48, 54, 47, 58, 70, 49, 58, 56, 60, 57, 58, 52, 52, 54, 44, 54, 74, 61, 48, 51, 61, 7...
$ InfantMortalityRate [3m[38;5;246m<chr>[39m[23m "3.00%", "12.30%", "7.80%", "4.60%", "8.90%", "7.90%", "8.10%", "11.10%", "10.20%", "7.10%", "10.80%", "6.60%", "9.10%", "7.3...
$ Region [3m[38;5;246m<chr>[39m[23m "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", "Afri...
$ Year [3m[38;5;246m<chr>[39m[23m "12/1/2004", "12/1/2004", "12/1/2004", "12/1/2004", "12/1/2004", "12/1/2004", "12/1/2004", "12/1/2004", "12/1/2004", "12/1/20...
# Need to strip out '%' from 2 of the columns
worldIndicators2$birth <- str_sub(worldIndicators2$`BirthRate`, 1, str_length(worldIndicators2$`BirthRate`)-1)
worldIndicators2$birth <- as.numeric(worldIndicators2$birth)
worldIndicators2$im <- str_sub(worldIndicators2$`InfantMortalityRate`, 1, str_length(worldIndicators2$`InfantMortalityRate`)-1)
worldIndicators2$im <- as.numeric(worldIndicators2$im)
worldIndicators2$life <- as.numeric(worldIndicators2$`LifeExpectancy`)
# Convert Year to numeric year
worldIndicators2$Yr <- as.Date.character(worldIndicators2$Year, "%m/%d/%Y")
worldIndicators2$Region <- as.factor(worldIndicators2$Region)
view(worldIndicators2)
# Now summarize average rates by region
worldIndicators3 <- worldIndicators2 %>%
group_by(Region, Yr) %>%
summarize(birthrate = mean(birth, na.rm = T),
life_exp = mean(life, na.rm = T),
infant = mean(im, na.rm = T))
worldIndicators3$Yr1 <- as.numeric(format(as.Date(worldIndicators3$Yr, format("%m%d%Y%")), "%Y"))
worldIndicators3
str(worldIndicators3)
Classes ‘grouped_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 42 obs. of 6 variables:
$ Region : Factor w/ 6 levels "Africa","Asia",..: 1 1 1 1 1 1 1 2 2 2 ...
$ Yr : Date, format: "2004-12-01" "2005-12-01" "2006-12-01" "2007-12-01" ...
$ birthrate: num 3.69 3.67 3.65 3.62 3.6 ...
$ life_exp : num 55.1 55.5 56 56.6 57.2 ...
$ infant : num 7.12 6.89 6.66 6.43 6.23 ...
$ Yr1 : num 2004 2005 2006 2007 2008 ...
- attr(*, "problems")=Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 3023 obs. of 5 variables:
..$ row : int 1041 1041 1042 1042 1043 1043 1044 1044 1045 1045 ...
..$ col : chr "Business Tax Rate" "Hours to do Tax" "Business Tax Rate" "Hours to do Tax" ...
..$ expected: chr "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
..$ actual : chr "0.769" "451" "0.521" "272" ...
..$ file : chr "'C:\\My Data\\World Indicators.csv'" "'C:\\My Data\\World Indicators.csv'" "'C:\\My Data\\World Indicators.csv'" "'C:\\My Data\\World Indicators.csv'" ...
- attr(*, "spec")=
.. cols(
.. BirthRate = [31mcol_character()[39m,
.. `Business Tax Rate` = [33mcol_logical()[39m,
.. `CO2 Emissions` = [32mcol_double()[39m,
.. `Above Threshold?` = [31mcol_character()[39m,
.. `GDP per Capita` = [31mcol_character()[39m,
.. `Birth Rate Bin` = [31mcol_character()[39m,
.. LifeExpectancy = [32mcol_double()[39m,
.. `Ease of Business (clusters)` = [31mcol_character()[39m,
.. Country = [31mcol_character()[39m,
.. `Days to Start Business` = [32mcol_double()[39m,
.. `Ease of Business` = [33mcol_logical()[39m,
.. `Energy Usage` = [32mcol_double()[39m,
.. `GDP per Capita (bin)` = [31mcol_character()[39m,
.. GDP = [31mcol_character()[39m,
.. `Health Exp % GDP` = [31mcol_character()[39m,
.. `Health Exp/Capita` = [31mcol_character()[39m,
.. `Hours to do Tax` = [33mcol_logical()[39m,
.. InfantMortalityRate = [31mcol_character()[39m,
.. `Internet Usage` = [31mcol_character()[39m,
.. `Lending Interest` = [32mcol_double()[39m,
.. `Life Expectancy Female` = [32mcol_double()[39m,
.. `Life Expectancy Male` = [32mcol_double()[39m,
.. `Mobile Phone Usage` = [31mcol_character()[39m,
.. `Number of Records` = [32mcol_double()[39m,
.. `Population 0-14` = [31mcol_character()[39m,
.. `Population 15-64` = [31mcol_character()[39m,
.. `Population 65+` = [32mcol_double()[39m,
.. `Population Total` = [31mcol_character()[39m,
.. `Population Urban` = [31mcol_character()[39m,
.. Region = [31mcol_character()[39m,
.. `Tourism Inbound` = [31mcol_character()[39m,
.. `Tourism Outbound` = [31mcol_character()[39m,
.. Year = [31mcol_character()[39m
.. )
- attr(*, "groups")=Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 6 obs. of 2 variables:
..$ Region: Factor w/ 6 levels "Africa","Asia",..: 1 2 3 4 5 6
..$ .rows :List of 6
.. ..$ : int 1 2 3 4 5 6 7
.. ..$ : int 8 9 10 11 12 13 14
.. ..$ : int 15 16 17 18 19 20 21
.. ..$ : int 22 23 24 25 26 27 28
.. ..$ : int 29 30 31 32 33 34 35
.. ..$ : int 36 37 38 39 40 41 42
..- attr(*, ".drop")= logi TRUE
# Birth Rate
p1 <- ggplot(worldIndicators3, aes(x = Yr1, y = birthrate, color = Region)) +
geom_line(size = 1.5) +
facet_grid(~ Region) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "none") +
scale_y_continuous(limits = c(0, 4),
breaks = c(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5)) +
labs(y = "Avg. Birth Rate(%)")
# Infant Mortality Rate
p2 <- ggplot(worldIndicators3, aes(x = Yr1, y = infant, color = Region)) +
geom_line(size = 1.5) +
facet_grid(~ Region) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text.x = element_blank(),
legend.position = "none") +
scale_y_continuous(limits = c(0,7.5),
breaks = c(1,2,3,4,5,6,7)) +
labs(y = "Avg. Infant Mortality Rate")
# Life Expectancy Rate
p3 <- ggplot(worldIndicators3,
aes(x = Yr1, y = life_exp, fill = Region)) +
geom_bar(stat = "identity")+
facet_grid(.~Region) +
theme(strip.text.x = element_blank(),
legend.position = "none",
axis.text.x=element_text(angle=90,hjust=1),
axis.title.x = element_blank()) +
scale_x_continuous(breaks = worldIndicators3$Yr1) +
labs(y = "Avg. Life Expectancy (years)")
grid.arrange(p1, p2, p3, top = "Avg. Birth Rate, Avg. Infant Mortality Rate & Avg. Life Expectancy
Region")