library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
#Set the working directory
setwd("C:/Users/Haley/Desktop/Data Visualization")
#Open the csv for Nations
nations <- read_csv("nations.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
summary(nations)
## iso2c iso3c country year
## Length:5275 Length:5275 Length:5275 Min. :1990
## Class :character Class :character Class :character 1st Qu.:1996
## Mode :character Mode :character Mode :character Median :2002
## Mean :2002
## 3rd Qu.:2008
## Max. :2014
##
## gdp_percap population birth_rate neonat_mortal_rate
## Min. : 239.7 Min. :9.004e+03 Min. : 6.90 Min. : 0.70
## 1st Qu.: 2263.6 1st Qu.:7.175e+05 1st Qu.:13.40 1st Qu.: 6.70
## Median : 6563.2 Median :5.303e+06 Median :21.60 Median :15.00
## Mean : 12788.8 Mean :2.958e+07 Mean :24.16 Mean :19.40
## 3rd Qu.: 17195.0 3rd Qu.:1.757e+07 3rd Qu.:33.88 3rd Qu.:29.48
## Max. :141968.1 Max. :1.364e+09 Max. :55.12 Max. :73.10
## NA's :766 NA's :14 NA's :295 NA's :525
## region income
## Length:5275 Length:5275
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
First need to create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.
#mutate with add and preserve variables
nations_mutate <- nations %>%
mutate(gdp = gdp_percap * population/10^12) %>%
filter(country == "China" | country == "Japan" | country == "United States" | country == "Germany")
Draw both charts
#Set up plot
nationsdotplot <- ggplot(nations_mutate, aes(x=year, y=gdp, group = country)) +
geom_line(aes(color = country))+
geom_point(aes(color = country))+
labs(title = "China's Rise to Become the Largest Economy")+
xlab("Year")+
ylab("GPD ($Trillion)")+
scale_color_brewer(palette = "Set1") +
theme_minimal()
nationsdotplot
Need to group_by region and year, then summarize on mutated value of gdp using line of code below. summarise(GDP = sum(gdp, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE)
nations2 <- nations %>%
mutate(gdp = gdp_percap * population/10^12)%>%
group_by(region,year) %>%
summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
chart2 <- ggplot(nations2,aes(x=year,y = GDP, fill = region))+
#use color = "white" to add white outlines to the graph
geom_area(color = "white")+
#add color brewer set2
scale_fill_brewer(palette = "Set2") +
#give a dark theme to add dark gray background
theme_minimal()+
#include labels for x and y axis
labs(title = "GDP by World Band Region")+
xlab("Year")+
ylab("GPD ($Trillion)")
chart2