library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(tidyr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
setwd("C:/Users/gru_e/OneDrive/Desktop/DATA110")
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
nations
## # A tibble: 5,275 x 10
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # ... with 5,265 more rows, and 2 more variables: region <chr>, income <chr>
nations2 <- nations %>%
mutate(gdp = gdp_percap * population/10^12)
nations2
## # A tibble: 5,275 x 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # ... with 5,265 more rows, and 3 more variables: region <chr>, income <chr>,
## # gdp <dbl>
I picked four different southeast Asian Countries: Thailand, Vietnam, Singapore, and the Philippines.
nations4 <- nations2 %>%
filter( country =='Thailand'|country =='Vietnam'|country =='Singapore'| country =='Philippines' )
nations4
## # A tibble: 100 x 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_r~
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 PH PHL Philippines 2000 3351. 77932247 29.6 16.8
## 2 PH PHL Philippines 1999 3205. 76285225 29.9 16.9
## 3 PH PHL Philippines 2001 3452. 79604541 29.3 16.7
## 4 PH PHL Philippines 2014 6994. 99138690 23.6 12.9
## 5 PH PHL Philippines 2013 6588. 97571676 23.8 13.3
## 6 PH PHL Philippines 2003 3732. 82971734 28.4 16.4
## 7 PH PHL Philippines 1998 3130. 74656228 30.2 17
## 8 PH PHL Philippines 2002 3557. 81294378 28.9 16.6
## 9 PH PHL Philippines 2012 6153. 96017322 24.0 13.7
## 10 PH PHL Philippines 2008 5126. 90297115 25.5 15
## # ... with 90 more rows, and 3 more variables: region <chr>, income <chr>,
## # gdp <dbl>
options(scipen=999) # Changing Scientific Notation to Full digit equivalent
p1 <- ggplot(nations4, aes(year,gdp,group=country)) +
geom_line(aes(colour = country)) +
geom_point(aes(colour = country))
#This is where I fill the color with scale_color_brewer(palette = "Set1")
p1 + scale_color_brewer(palette = "Set1") +
scale_x_continuous("year") +
ylab("GDP ($trillion)")+
ggtitle("GDP Rises in southeast Asian Countries")+
theme_minimal()
#Call nations2 dataset and group it by Region and Year
nations5 <- nations2 %>%
group_by(region,year) %>%
summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
# Plot
p2 <- ggplot(nations5, aes(x=year, y=GDP, fill=region)) +
geom_area(alpha=0.9 , size=.1, colour="white")
#Fill with palette = "Set2", adding y-label, title
p2 + scale_fill_brewer(palette = "Set2") +
ylab("GDP ($trillion)")+
ggtitle("GDP by World Bank Region")+
theme_minimal()