title: “Assignment 2” output: html_document: df_print: paged author: Peihan Tian, Jie Tang—

Phase 1

setwd("/Users/tjmask/Desktop/Courses/Information visualization/")
library(data.table)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
data = fread("World Indicators.csv")
## get the target countries
target_country = c('United States', 'Brazil', 'Russian Federation', 'India', 'China')
data_target = data[Country %in% target_country]
data_target = data_target[,.(Country, Year, `Internet Usage`, `CO2 Emissions`, `Health Exp % GDP`)]
head(data_target)
##               Country      Year Internet Usage CO2 Emissions
## 1:              China 12/1/2000             2%       3405180
## 2:              India 12/1/2000             1%       1186663
## 3: Russian Federation 12/1/2000             2%       1558112
## 4:             Brazil 12/1/2000             3%        327984
## 5:      United States 12/1/2000            43%       5713560
## 6:              China 12/1/2001             3%       3487566
##    Health Exp % GDP
## 1:             4.6%
## 2:             4.3%
## 3:             5.4%
## 4:             7.2%
## 5:            13.6%
## 6:             4.6%
## getting the target columns convert it to tidy data
library(tidyr)
data_target.tidy = na.omit(data_target %>% gather(key, Value, -Country, -Year))
data_target.tidy$Year = as.Date(data_target.tidy$Year, format = "%m/%d/%Y")
head(data_target.tidy)
##              Country       Year            key Value
## 1              China 2000-12-01 Internet Usage    2%
## 2              India 2000-12-01 Internet Usage    1%
## 3 Russian Federation 2000-12-01 Internet Usage    2%
## 4             Brazil 2000-12-01 Internet Usage    3%
## 5      United States 2000-12-01 Internet Usage   43%
## 6              China 2001-12-01 Internet Usage    3%
## ploting with facet_grid
p = ggplot(data_target.tidy, aes(x = Year, y = Value, color = key)) +
  geom_jitter() +
  facet_grid(key ~ Country,space = "free", scales = 'fixed',shrink= TRUE,  as.table=TRUE)+  labs(title = "Trends",
       subtitle = "(trends of 5 countries)",
       y = "Values", x = "Years")+
  theme(axis.text.x = element_text(angle =90), 
       axis.text.y = element_blank())
p

Phase 2

Step 1: library calls to load packages

library(tidyverse)
library(leaflet)
library(dplyr)
library(WDI)

Step 2: Call package WDI to retrieve most updated figures available.

Tableau Name WDI Series
Birth Rate SP.DYN.CBRT.IN
Infant Mortality Rate SP.DYN.IMRT.IN
Internet Usage IT.NET.USER.ZS
Life Expectancy (Total) SP.DYN.LE00.IN
Forest Area (% of land) AG.LND.FRST.ZS
Mobile Phone Usage IT.CEL.SETS.P2
Population Total SP.POP.TOTL
International Tourism receipts (current US$) ST.INT.RCPT.CD
Import value index (2000=100) TM.VAL.MRCH.XD.WD
Export value index (2000=100) TX.VAL.MRCH.XD.WD
birth <- "SP.DYN.CBRT.IN"
infmort <- "SP.DYN.IMRT.IN"
net <-"IT.NET.USER.ZS"
lifeexp <- "SP.DYN.LE00.IN"
forest <- "AG.LND.FRST.ZS"
mobile <- "IT.CEL.SETS.P2"
pop <- "SP.POP.TOTL"
tour <- "ST.INT.RCPT.CD"
import <- "TM.VAL.MRCH.XD.WD"
export <- "TX.VAL.MRCH.XD.WD"

# create a vector of the desired indicator series
indicators <- c(birth, infmort, net, lifeexp, forest,
                mobile, pop, tour, import, export)

countries <- WDI(country="all", indicator = indicators, 
     start = 1998, end = 2018, extra = TRUE)

## rename columns for each of reference
countries <- rename(countries, birth = SP.DYN.CBRT.IN, 
       infmort = SP.DYN.IMRT.IN, net  = IT.NET.USER.ZS,
       lifeexp = SP.DYN.LE00.IN, forest = AG.LND.FRST.ZS,
       mobile = IT.CEL.SETS.P2, pop = SP.POP.TOTL, 
       tour = ST.INT.RCPT.CD, import = TM.VAL.MRCH.XD.WD,
       export = TX.VAL.MRCH.XD.WD)

# convert geocodes from factors into numerics
countries$lng <- as.numeric(as.character(countries$longitude))
countries$lat <- as.numeric(as.character(countries$latitude))

# Remove groupings, which have no geocodes
countries <- countries %>%
   filter(!is.na(lng))

A Glimpse of the new dataframe

glimpse(countries)
## Observations: 4,410
## Variables: 22
## $ iso2c     <chr> "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", …
## $ country   <chr> "Andorra", "Andorra", "Andorra", "Andorra", "Andorra",…
## $ year      <int> 2018, 2007, 2004, 2005, 2017, 1998, 1999, 2000, 2006, …
## $ birth     <dbl> NA, 10.100, 10.900, 10.700, NA, 11.900, 12.600, 11.300…
## $ infmort   <dbl> 2.7, 4.5, 5.1, 4.9, 2.8, 6.4, 6.2, 5.9, 4.7, 5.5, 5.3,…
## $ net       <dbl> NA, 70.870000, 26.837954, 37.605766, 91.567467, 6.8862…
## $ lifeexp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ forest    <dbl> NA, 34.042553, 34.042553, 34.042553, NA, 34.042553, 34…
## $ mobile    <dbl> 107.28255, 76.80204, 76.55160, 81.85933, 104.33241, 22…
## $ pop       <dbl> 77006, 82684, 76244, 78867, 77001, 64142, 64370, 65390…
## $ tour      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ import    <dbl> 136.50668, 190.30053, 174.09246, 178.06349, 146.27331,…
## $ export    <dbl> 268.35043, 332.78037, 271.81148, 314.89205, 264.92993,…
## $ iso3c     <fct> AND, AND, AND, AND, AND, AND, AND, AND, AND, AND, AND,…
## $ region    <fct> Europe & Central Asia, Europe & Central Asia, Europe &…
## $ capital   <fct> Andorra la Vella, Andorra la Vella, Andorra la Vella, …
## $ longitude <fct> 1.5218, 1.5218, 1.5218, 1.5218, 1.5218, 1.5218, 1.5218…
## $ latitude  <fct> 42.5075, 42.5075, 42.5075, 42.5075, 42.5075, 42.5075, …
## $ income    <fct> High income, High income, High income, High income, Hi…
## $ lending   <fct> Not classified, Not classified, Not classified, Not cl…
## $ lng       <dbl> 1.5218, 1.5218, 1.5218, 1.5218, 1.5218, 1.5218, 1.5218…
## $ lat       <dbl> 42.5075, 42.5075, 42.5075, 42.5075, 42.5075, 42.5075, …

Plot from Phase 1

Replace this text chunk with an explanation of what you have done.

# your code goes here
#newcountry can show birthrate in 1998 and newcountry2 will show birthrate in 2017
newcountry=countries%>%filter(year==1998)
newcountry=na.omit(newcountry[,c(2,4,3,21,22)])
newcountry2=countries%>%filter(year==2017)
newcountry2=na.omit(newcountry2[,c(2,4,3,21,22)])

World map showing a variable in 1998

In this assignment I use different colors of circlemarkers to represent different birthrates. For example, birthrates below 15 is low birthrate, 15-30 represents medium birthrates, 30-45 represent high and 45-60 represent very high.

# Birth rate in 1998
newcountry$range=cut(newcountry$birth,
                     breaks = c(0,15,30,45,60),right = FALSE,
                     labels = c('low','medium','high','very high'))


pal=colorNumeric(palette = 'Reds',domain = c(1:55), reverse = FALSE)

map_1998 <- leaflet::leaflet(data = newcountry) %>%
  leaflet::addProviderTiles("CartoDB")%>%
  addCircleMarkers(color = ~pal(birth), radius = 6, label =          paste('range=',newcountry$birth,'Type=',newcountry$range)) %>%
  addLegend(title="Birth Rate 1998", pal=pal, values=c(1:55), position = 'bottomright')
## Assuming "lng" and "lat" are longitude and latitude, respectively
map_1998

Countries with the highest birthrates are mostly located in Africa, and countries in Europe have a rrelatively low birthrates. Countries in Asia such as China have a medium birthdates. Most countries in North America also have medium birthrates. In conclusion, the more developed a country, the lower its birthrates, which means that families have to focus and spend more on each kids. Perhaps that is the main reason many families do not want lots of kids.

World map showing the same variable recently

# your code goes here
# Birth rate in 2017
newcountry2$range=cut(newcountry2$birth,
                     breaks = c(0,15,30,45,60),right = FALSE,
                     labels = c('low','medium','high','very high'))

pal=colorNumeric(palette = 'Reds',domain = c(1:55), reverse = FALSE)

map_2017 <- leaflet::leaflet(data = newcountry2) %>%
  leaflet::addProviderTiles("CartoDB")%>%
  addCircleMarkers(color = ~pal(birth), radius = 6, label =          paste('range=',newcountry2$birth,'Type=',newcountry2$range)) %>%
  addLegend(title="Birth Rate 2017", pal=pal, values=c(1:55), position = 'bottomright')
## Assuming "lng" and "lat" are longitude and latitude, respectively
map_2017