Week 4 Nations Assignment
Intro Stuff
setwd( "C:/Users/Jerome/Documents/Data_Science_110/Datasets")
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
nations <- read.csv("nations.csv")
Create New Variables
nations <- mutate(nations, gdp = gdp_percap * population)
nations_mod <- mutate(nations, gdp_trillion =gdp / 1000000000000)
str(nations_mod)
## 'data.frame': 5275 obs. of 19 variables:
## $ X.6 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X.5 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X.4 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X.3 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X.2 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X.1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ iso2c : chr "AD" "AD" "AD" "AD" ...
## $ iso3c : chr "AND" "AND" "AND" "AND" ...
## $ country : chr "Andorra" "Andorra" "Andorra" "Andorra" ...
## $ year : int 1996 1994 2003 1990 2009 2011 2004 2010 2001 2002 ...
## $ gdp_percap : num NA NA NA NA NA NA NA NA NA NA ...
## $ population : num 64291 62707 74783 54511 85474 ...
## $ birth_rate : num 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
## $ neonat_mortal_rate: num 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
## $ region : chr "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
## $ income : chr "High income" "High income" "High income" "High income" ...
## $ gdp : num NA NA NA NA NA NA NA NA NA NA ...
## $ gdp_trillion : num NA NA NA NA NA NA NA NA NA NA ...
write.csv(nations, file = "nations.csv")
write.csv(nations_mod, file = "nations_mod.csv")
Filter the file to select 4 countries
four_states <-filter(nations_mod, iso3c == "ARM" |iso3c == "AZE" | iso3c == "GEO" | iso3c == "TJK")
summary(four_states$year)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1990 1996 2002 2002 2008 2014
write.csv(four_states, file = "four_states.csv")
Attempt the Plot
##install.packages("ggplot2")
library(ggplot2)
plot1 <- four_states %>%
ggplot(aes(year, gdp_trillion, color = iso3c))+
geom_point()+
##geom_smooth(method="lm" , se = FALSE) +
xlab("Years") +
ylab("GDP/Trillion") +
ggtitle("Scatterplot of GDP/Trillion for the South Caucasus Countries & Tajikistan, 1990 - 2014")+
scale_colour_brewer(palette = "Set1")+
##cex.main = .75 +
theme_dark(base_size=10)
plot1

Now do the area plot
##write.csv(regions, "regions.csv")
##install.packages("ggplot2")
library(ggplot2)
regions <- group_by(nations_mod, region, year)
regions_summarized <- summarize (regions, sum=sum(gdp_trillion, na.rm=TRUE))
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
plot2 <- regions_summarized %>%
ggplot(aes(year, sum, fill = region, color = "white"))+
geom_area()+
xlab("Years") +
ylab("GDP/Trillion") +
ggtitle("Area Plot of World GDP/Trillion by WB Region 1990 - 2014")+
scale_colour_brewer(palette = "Set2")+
theme_minimal(base_size=10)
plot2

write.csv(regions_summarized, file = "regions_summarized.csv")