library(fansi)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
nations <- read_csv("nations.csv") %>%
mutate(gdp_tn = gdp_percap*population/1000000000000)
##
## -- Column specification --------------------------------------------------------
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
str(nations)
## spec_tbl_df [5,275 x 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ iso2c : chr [1:5275] "AD" "AD" "AD" "AD" ...
## $ iso3c : chr [1:5275] "AND" "AND" "AND" "AND" ...
## $ country : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
## $ year : num [1:5275] 1996 1994 2003 1990 2009 ...
## $ gdp_percap : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
## $ population : num [1:5275] 64291 62707 74783 54511 85474 ...
## $ birth_rate : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
## $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
## $ region : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
## $ income : chr [1:5275] "High income" "High income" "High income" "High income" ...
## $ gdp_tn : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. iso2c = col_character(),
## .. iso3c = col_character(),
## .. country = col_character(),
## .. year = col_double(),
## .. gdp_percap = col_double(),
## .. population = col_double(),
## .. birth_rate = col_double(),
## .. neonat_mortal_rate = col_double(),
## .. region = col_character(),
## .. income = col_character()
## .. )
view(nations)
Yes, gdp_tn exists.
# prepare data
big4 <- nations %>%
filter(iso3c == "CHN" | iso3c == "DEU" | iso3c == "JPN" | iso3c == "USA") %>%
arrange(year)
view(big4)
The filter has worked. Only the four countries appear in the country column.
# save basic chart template
big4_chart <- ggplot(big4, aes(x = year, y = gdp_tn)) +
labs(title = "China's Rise as the Biggest Economy") +
xlab("Year") +
ylab("GDP (trillions)") +
theme_minimal(base_size = 14)
big4_chart
big4_chart +
geom_point(aes(color=country), size = 3)+
scale_color_brewer(palette = "Set1")
If I try to add geom-line I get a weird figure:
big4_chart +
geom_point(aes(color=country), size = 3)+
geom_line()
scale_color_brewer(palette = "Set1")
## <ggproto object: Class ScaleDiscrete, Scale, gg>
## aesthetics: colour
## axis_order: function
## break_info: function
## break_positions: function
## breaks: waiver
## call: call
## clone: function
## dimension: function
## drop: TRUE
## expand: waiver
## get_breaks: function
## get_breaks_minor: function
## get_labels: function
## get_limits: function
## guide: legend
## is_discrete: function
## is_empty: function
## labels: waiver
## limits: NULL
## make_sec_title: function
## make_title: function
## map: function
## map_df: function
## n.breaks.cache: NULL
## na.translate: TRUE
## na.value: NA
## name: waiver
## palette: function
## palette.cache: NULL
## position: left
## range: <ggproto object: Class RangeDiscrete, Range, gg>
## range: NULL
## reset: function
## train: function
## super: <ggproto object: Class RangeDiscrete, Range, gg>
## rescale: function
## reset: function
## scale_name: brewer
## train: function
## train_df: function
## transform: function
## transform_df: function
## super: <ggproto object: Class ScaleDiscrete, Scale, gg>
If I do it with summarise I get this error message:
Error in summarise(GDP = sum(gdp, na.rm = TRUE)) : object ‘gdp’ not found
If I do it without summarising I get this
nations %>%
mutate(group=factor(region, levels = c("East Asia & Pacific", "Europe & Central Asia","Latin America & Caribbean","Middle East & North Africa", "North America", "South Asia", "Sub-Saharan Africa"), ordered=TRUE))
## # A tibble: 5,275 x 12
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # ... with 5,265 more rows, and 4 more variables: region <chr>, income <chr>,
## # gdp_tn <dbl>, group <ord>
If I try to do the geom area, I get a wierd figure
ggplot(nations, aes(x = year, y = gdp_tn)) +
labs(title = "GDP by World Bank Regions") +
xlab("Year") +
ylab("GDP (trillions)")+
geom_area (aes(fill=region, color= "white" ))+
scale_fill_brewer(palette = "Set2")
## Warning: Removed 766 rows containing missing values (position_stack).