Week 4 Nations Assignment

Intro Stuff

setwd( "C:/Users/Jerome/Documents/Data_Science_110/Datasets")
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
nations <- read.csv("nations.csv")

Create New Variables

nations <- mutate(nations, gdp  = gdp_percap * population)
nations_mod <- mutate(nations, gdp_trillion =gdp / 1000000000000)
str(nations_mod)
## 'data.frame':    5275 obs. of  19 variables:
##  $ X.6               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ X.5               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ X.4               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ X.3               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ X.2               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ X.1               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ X                 : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ iso2c             : chr  "AD" "AD" "AD" "AD" ...
##  $ iso3c             : chr  "AND" "AND" "AND" "AND" ...
##  $ country           : chr  "Andorra" "Andorra" "Andorra" "Andorra" ...
##  $ year              : int  1996 1994 2003 1990 2009 2011 2004 2010 2001 2002 ...
##  $ gdp_percap        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ population        : num  64291 62707 74783 54511 85474 ...
##  $ birth_rate        : num  10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
##  $ neonat_mortal_rate: num  2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
##  $ region            : chr  "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
##  $ income            : chr  "High income" "High income" "High income" "High income" ...
##  $ gdp               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gdp_trillion      : num  NA NA NA NA NA NA NA NA NA NA ...
write.csv(nations, file = "nations.csv")
write.csv(nations_mod, file = "nations_mod.csv")

Filter the file to select 4 countries

four_states <-filter(nations_mod, iso3c == "ARM" |iso3c ==  "AZE" | iso3c == "GEO" | iso3c == "TJK")
summary(four_states$year)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1990    1996    2002    2002    2008    2014
write.csv(four_states, file = "four_states.csv")

Attempt the Plot

##install.packages("ggplot2")
library(ggplot2)
plot1 <- four_states %>% 
  ggplot(aes(year, gdp_trillion, color = iso3c))+ 
  geom_point()+
  ##geom_smooth(method="lm" , se = FALSE) +
  xlab("Years") +
  ylab("GDP/Trillion") +
  ggtitle("Scatterplot of GDP/Trillion for the South Caucasus Countries & Tajikistan, 1990 - 2014")+
  scale_colour_brewer(palette = "Set1")+
  ##cex.main = .75 +
 theme_dark(base_size=10)
  plot1

Now do the area plot

##write.csv(regions, "regions.csv")
##install.packages("ggplot2")
library(ggplot2)
regions <- group_by(nations_mod, region, year)
regions_summarized <- summarize (regions, sum=sum(gdp_trillion, na.rm=TRUE))
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
plot2 <- regions_summarized %>%
 ggplot(aes(year, sum, fill = region, color = "white"))+
  geom_area()+
  xlab("Years") +
  ylab("GDP/Trillion") +
  ggtitle("Area Plot of World GDP/Trillion by WB Region 1990 - 2014")+
  scale_colour_brewer(palette = "Set2")+
 theme_minimal(base_size=10)
  plot2

write.csv(regions_summarized, file = "regions_summarized.csv")