setwd("C:/Users/small/Desktop/MCollege/2021/DATA110/Datasets")
I loaded the libraries and the file
library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
nations <- read_csv("nations.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
nations
## # A tibble: 5,275 x 10
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_r~
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # ... with 5,265 more rows, and 2 more variables: region <chr>, income <chr>
I created a new variable ‘gdp_country’ using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion. Then I filtered the data with dplyr for the four desired countries.
nations1 <- nations %>%
mutate(gdp_country = (gdp_percap*population)/1000000000000) %>%
filter(country == 'China' | country == 'Germany' | country == 'Japan' | country == 'United States')
nations1
## # A tibble: 100 x 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_r~
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CN CHN China 1992 1260. 1164970000 18.3 29.4
## 2 CN CHN China 2005 5053. 1303720000 12.4 14
## 3 CN CHN China 2000 2915. 1262645000 14.0 21.2
## 4 CN CHN China 1991 1091. 1150780000 19.7 29.7
## 5 CN CHN China 2013 12219. 1357380000 12.1 6.3
## 6 CN CHN China 1999 2650. 1252735000 14.6 22.2
## 7 CN CHN China 2014 13255. 1364270000 12.4 5.9
## 8 CN CHN China 2003 3934. 1288400000 12.4 17.1
## 9 CN CHN China 2004 4423. 1296075000 12.3 15.5
## 10 CN CHN China 1993 1453. 1178440000 18.1 28.8
## # ... with 90 more rows, and 3 more variables: region <chr>, income <chr>,
## # gdp_country <dbl>
For the chart # 1 with ggplot2, I added both geom_point and geom_line layers, and used the Set1 ColorBrewer palette.
plot1 <- ggplot(data = nations1, aes(x= year, y = gdp_country, colour = country)) +
geom_line() +
geom_point() +
labs(title = "China's Rise to Become the Largest Economy") +
xlab("Year") +
ylab("GDP ($ trillion)") +
theme_minimal(base_size = 12) +
scale_color_brewer(palette = "Set1")
plot1 <- ggplotly(plot1)
plot1
For the chart # 2, I used dplyr to group_by region and year, and then summarized on new mutated value. (There are null values, or NAs, in this data, so I needed to use na.rm = TRUE).
nations2 <- nations %>%
mutate(gdp_country = (gdp_percap*population)/1000000000000) %>%
group_by(region, year) %>%
summarise(GDP = sum(gdp_country, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
nations2
## # A tibble: 175 x 3
## # Groups: region [7]
## region year GDP
## <chr> <dbl> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
## 7 East Asia & Pacific 1996 8.96
## 8 East Asia & Pacific 1997 9.55
## 9 East Asia & Pacific 1998 9.60
## 10 East Asia & Pacific 1999 10.1
## # ... with 165 more rows
For Plot #2 with ggolot2, I used geom_area for each region’s area. For filling areas I applyed ColorBrewer palette using scale_fill_brewer(palette = “Set2”). To make a white line between areas, I applied ‘color’ layer.
plot2 <- ggplot(data = nations2, aes(x= year, y = GDP, fill = region)) +
geom_area(color = 'white') +
scale_fill_brewer(palette = "Set2") +
labs(title = "GDP by Wolrd Bank Region") +
xlab("Year") +
ylab("GDP ($ trillion)") +
labs(fill = "Regions") +
theme_minimal(base_size = 12)
plot2