library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
##
## Attaching package: 'plotly'
setwd("C:/Users/munis/Documents/Comm in Data Science/Datasets")
nations <- read_csv('nations.csv')
## Parsed with column specification:
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
## Parsed with column specification:
## cols(
## state = col_character(),
## murder = col_double(),
## forcible_rape = col_double(),
## robbery = col_double(),
## aggravated_assault = col_double(),
## burglary = col_double(),
## larceny_theft = col_double(),
## motor_vehicle_theft = col_double(),
## population = col_double()
## )
head(nations)
## # A tibble: 6 x 10
## iso2c iso3c country year gdp_percap population birth_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9
## 2 AD AND Andorra 1994 NA 62707 10.9
## 3 AD AND Andorra 2003 NA 74783 10.3
## 4 AD AND Andorra 1990 NA 54511 11.9
## 5 AD AND Andorra 2009 NA 85474 9.9
## 6 AD AND Andorra 2011 NA 82326 NA
## # ... with 3 more variables: neonat_mortal_rate <dbl>, region <chr>,
## # income <chr>
#nations <- nations %>%
# filter( id== "China" | id == "Germany" | id == "Japan" | id == "United States")
nations2 <- nations %>%
mutate(GDP = (gdp_percap * population)/1000000000000) %>%
filter(country == "China" | country == "Germany" | country == "Japan" | country == "United States")
head(nations2)
## # A tibble: 6 x 11
## iso2c iso3c country year gdp_percap population birth_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CN CHN China 1992 1260. 1164970000 18.3
## 2 CN CHN China 2005 5053. 1303720000 12.4
## 3 CN CHN China 2000 2915. 1262645000 14.0
## 4 CN CHN China 1991 1091. 1150780000 19.7
## 5 CN CHN China 2013 12219. 1357380000 12.1
## 6 CN CHN China 1999 2650. 1252735000 14.6
## # ... with 4 more variables: neonat_mortal_rate <dbl>, region <chr>,
## # income <chr>, GDP <dbl>
p1 <- ggplot(nations2, aes(year, GDP, color=country)) +
xlab("year") +
ylab("GDP ($trillion)") +
scale_color_brewer(palette = "Set1")
p1 +
geom_line() +
geom_point()

p2 <- nations %>%
group_by(region, year) %>%
summarise(sum = sum(gdp_percap, na.rm = TRUE))
p2
## # A tibble: 175 x 3
## # Groups: region [7]
## region year sum
## <chr> <dbl> <dbl>
## 1 East Asia & Pacific 1990 213116.
## 2 East Asia & Pacific 1991 234287.
## 3 East Asia & Pacific 1992 246209.
## 4 East Asia & Pacific 1993 257732.
## 5 East Asia & Pacific 1994 272159.
## 6 East Asia & Pacific 1995 286105.
## 7 East Asia & Pacific 1996 296956.
## 8 East Asia & Pacific 1997 304359.
## 9 East Asia & Pacific 1998 298089.
## 10 East Asia & Pacific 1999 307967.
## # ... with 165 more rows
plot2 <- ggplot(p2, aes(x = year, y = sum, fill=region)) +
scale_fill_brewer(palette = "Set2") +
xlab("year") +
ylab("GDP ($trillion)")
plot2 +
geom_area(color = "white")
