library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.4
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(RColorBrewer)
setwd("C:/Users/Dano/Documents/") #sets working directoy
nations <- read_csv("nations.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
# uploads data
nationsgdp <- nations %>%
select(gdp_percap, population, country, year, region) %>%
mutate(GDP = ((gdp_percap * population)/10^12))
nationsgdp
## # A tibble: 5,275 x 6
## gdp_percap population country year region GDP
## <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 NA 64291 Andorra 1996 Europe & Central Asia NA
## 2 NA 62707 Andorra 1994 Europe & Central Asia NA
## 3 NA 74783 Andorra 2003 Europe & Central Asia NA
## 4 NA 54511 Andorra 1990 Europe & Central Asia NA
## 5 NA 85474 Andorra 2009 Europe & Central Asia NA
## 6 NA 82326 Andorra 2011 Europe & Central Asia NA
## 7 NA 78337 Andorra 2004 Europe & Central Asia NA
## 8 NA 84419 Andorra 2010 Europe & Central Asia NA
## 9 NA 67770 Andorra 2001 Europe & Central Asia NA
## 10 NA 71046 Andorra 2002 Europe & Central Asia NA
## # ... with 5,265 more rows
top4 <- nationsgdp %>%
select(GDP, country, year, region) %>%
filter(((country=="China") | (country=="Japan")| (country=="Germany") | (country=="United States")))
#For the first chart, you will need to filter the data with dplyr for the four desired countries. When making the chart with ggplot2 you will need to add both geom_point and geom_line layers, and use the Set1 ColorBrewer palette using: scale_color_brewer(palette = “Set1”).
ggplot(top4, aes(x=year, y=GDP, color=country)) +
geom_line() +
geom_point() +
ggtitle("China's Rise to Become the Largest Economy") + labs(x = "year", y="GDP($ trillion)") + theme(legend.title=element_blank()) + theme_minimal()
scale_colour_brewer(palette = "Set1")
## <ggproto object: Class ScaleDiscrete, Scale, gg>
## aesthetics: colour
## axis_order: function
## break_info: function
## break_positions: function
## breaks: waiver
## call: call
## clone: function
## dimension: function
## drop: TRUE
## expand: waiver
## get_breaks: function
## get_breaks_minor: function
## get_labels: function
## get_limits: function
## guide: legend
## is_discrete: function
## is_empty: function
## labels: waiver
## limits: NULL
## make_sec_title: function
## make_title: function
## map: function
## map_df: function
## n.breaks.cache: NULL
## na.translate: TRUE
## na.value: NA
## name: waiver
## palette: function
## palette.cache: NULL
## position: left
## range: <ggproto object: Class RangeDiscrete, Range, gg>
## range: NULL
## reset: function
## train: function
## super: <ggproto object: Class RangeDiscrete, Range, gg>
## rescale: function
## reset: function
## scale_name: brewer
## train: function
## train_df: function
## transform: function
## transform_df: function
## super: <ggproto object: Class ScaleDiscrete, Scale, gg>
#For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp using summarise(GDP = sum(gdp, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE). # •Each region’s area will be generated by the command geom_area ()
# •When drawing the chart with ggplot2, you will need to use the Set2 ColorBrewer palette using scale_fill_brewer(palette = “Set2”) # •Think about the difference between fill and color when making the chart, and where the above fill command needs to go in order for the regions to fill with the different colors when making the chart, and put a very thin white line around each area.
region <- nationsgdp %>%
group_by(region, year)%>%
summarise (GDP =sum(GDP, na.rm=TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
ggplot(region, aes(x=year, y=GDP, fill=region)) +
geom_area(colour="white") +
scale_fill_brewer(palette = "Set2") + theme_minimal()+
ggtitle("GDP by World Bank Region") + labs(x = "year", y="GDP($ trillion)")