Nations Charts Assignments

Chart 1 China’s Rise to Become the Largest Economy

Load the dataset

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

p1 <- read_csv("nations.csv")

Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

view(p1)

#filter all nas in gdp_percap
nations_nona <-p1 |>
 filter(!is.na(gdp_percap))

#create a new variable (GDP_intrillion) :GDP of each country in trillions of dollars
GDP_intrillion <- nations_nona |>
  mutate(GDP_intrillion=gdp_percap*population/10^12) 

GDP_intrillion

# A tibble: 4,509 × 11
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 AE    ARE   United…  1991     73037.    1913190       24.6                7.9
 2 AE    ARE   United…  1993     71960.    2127863       22.4                7.3
 3 AE    ARE   United…  2001     83534.    3217865       15.8                5.5
 4 AE    ARE   United…  1992     73154.    2019014       23.5                7.6
 5 AE    ARE   United…  1994     74684.    2238281       21.3                6.9
 6 AE    ARE   United…  2007     75427.    6010100       12.8                4.7
 7 AE    ARE   United…  2004     87844.    3975945       14.2                5.1
 8 AE    ARE   United…  1996     79480.    2467726       19.3                6.4
 9 AE    ARE   United…  2006     82754.    5171255       13.3                4.9
10 AE    ARE   United…  2000     84975.    3050128       16.4                5.6
# ℹ 4,499 more rows
# ℹ 3 more variables: region <chr>, income <chr>, GDP_intrillion <dbl>

#find top 4 GDP countries
GDP_top4 <- GDP_intrillion |>
  group_by(country) |>
  summarise(max_gdp=max(GDP_intrillion),na.rm = TRUE) |>
  arrange(desc(max_gdp)) |>
head(4)

GDP_top4

# A tibble: 4 × 3
  country       max_gdp na.rm
  <chr>           <dbl> <lgl>
1 China           18.1  TRUE 
2 United States   17.3  TRUE 
3 India            7.35 TRUE 
4 Japan            4.66 TRUE

#filter the data with dplyr for the four desired countries
coutry_top4 <- GDP_intrillion |>
  filter(country %in% GDP_top4$country)

coutry_top4

# A tibble: 100 × 11
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 CN    CHN   China    1992      1260. 1164970000       18.3               29.4
 2 CN    CHN   China    2005      5053. 1303720000       12.4               14  
 3 CN    CHN   China    2000      2915. 1262645000       14.0               21.2
 4 CN    CHN   China    1991      1091. 1150780000       19.7               29.7
 5 CN    CHN   China    2013     12219. 1357380000       12.1                6.3
 6 CN    CHN   China    1999      2650. 1252735000       14.6               22.2
 7 CN    CHN   China    2014     13255. 1364270000       12.4                5.9
 8 CN    CHN   China    2003      3934. 1288400000       12.4               17.1
 9 CN    CHN   China    2004      4423. 1296075000       12.3               15.5
10 CN    CHN   China    1993      1453. 1178440000       18.1               28.8
# ℹ 90 more rows
# ℹ 3 more variables: region <chr>, income <chr>, GDP_intrillion <dbl>

Draw chart 1 with ggplot2.

p1 <- ggplot(coutry_top4,aes(x=year,y=GDP_intrillion,color=country)) +
  geom_line(size=0.4)+
  geom_point()+
  xlim(1990,2015)+
  ylim(0,20)+
  scale_color_brewer(palette="Set1")+
  labs(title="China's Rise to Become the Largest Economy",
  caption="Source:nations.csv",
  x="Year",
  y="GDP($trillion)")+
  theme_minimal(base_size=12)

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

p1

Chart 2 GDP by World Bank Region

Load the dataset * Each region’s area will be generated by the command geom_area ()

#create a new variable (RegionalGDP_intrillion) :regional GDP  in trillions of dollars
#group_by region and year
RegionalGDP_intrillion <- GDP_intrillion |>
  group_by(region,year) |>
  summarise(GDP=sum(GDP_intrillion,na.rm = TRUE))

`summarise()` has regrouped the output.
ℹ Summaries were computed grouped by region and year.
ℹ Output is grouped by region.
ℹ Use `summarise(.groups = "drop_last")` to silence this message.
ℹ Use `summarise(.by = c(region, year))` for per-operation grouping
  (`?dplyr::dplyr_by`) instead.

RegionalGDP_intrillion

# A tibble: 175 × 3
# Groups:   region [7]
   region               year   GDP
   <chr>               <dbl> <dbl>
 1 East Asia & Pacific  1990  5.52
 2 East Asia & Pacific  1991  6.03
 3 East Asia & Pacific  1992  6.50
 4 East Asia & Pacific  1993  7.04
 5 East Asia & Pacific  1994  7.64
 6 East Asia & Pacific  1995  8.29
 7 East Asia & Pacific  1996  8.96
 8 East Asia & Pacific  1997  9.55
 9 East Asia & Pacific  1998  9.60
10 East Asia & Pacific  1999 10.1 
# ℹ 165 more rows

names(RegionalGDP_intrillion)

[1] "region" "year"   "GDP"

head(RegionalGDP_intrillion)

# A tibble: 6 × 3
# Groups:   region [1]
  region               year   GDP
  <chr>               <dbl> <dbl>
1 East Asia & Pacific  1990  5.52
2 East Asia & Pacific  1991  6.03
3 East Asia & Pacific  1992  6.50
4 East Asia & Pacific  1993  7.04
5 East Asia & Pacific  1994  7.64
6 East Asia & Pacific  1995  8.29

Draw chart 2 with ggplot2.

p2 <- ggplot(RegionalGDP_intrillion,aes(x=year,y=GDP,fill=region)) +
  geom_area(alpha=0.9,size =0.2,color="white")+
  #xlim(1990,2015)+
  #ylim(0,100)+
  #color:Set1,Set2,Paired,Dark2 
  scale_fill_brewer(palette = "Paired") +
  labs(title="GDP by World Bank Region",
  caption="Source:nations.csv",
  x="Year",
  y="GDP($trillion)")+
  theme_minimal(base_size=12)
p2

Citation

Data 110 week 6,week 5 material(chart 1)

https://r-graph-gallery.com/136-stacked-area-chart.html(chart 2)