Chart 1 China’s Rise to Become the Largest Economy
Load the dataset
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.1.6
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.2 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
p1 <-read_csv("nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
view(p1)#filter all nas in gdp_percapnations_nona <-p1 |>filter(!is.na(gdp_percap))#create a new variable (GDP_intrillion) :GDP of each country in trillions of dollarsGDP_intrillion <- nations_nona |>mutate(GDP_intrillion=gdp_percap*population/10^12) GDP_intrillion
# A tibble: 4,509 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AE ARE United… 1991 73037. 1913190 24.6 7.9
2 AE ARE United… 1993 71960. 2127863 22.4 7.3
3 AE ARE United… 2001 83534. 3217865 15.8 5.5
4 AE ARE United… 1992 73154. 2019014 23.5 7.6
5 AE ARE United… 1994 74684. 2238281 21.3 6.9
6 AE ARE United… 2007 75427. 6010100 12.8 4.7
7 AE ARE United… 2004 87844. 3975945 14.2 5.1
8 AE ARE United… 1996 79480. 2467726 19.3 6.4
9 AE ARE United… 2006 82754. 5171255 13.3 4.9
10 AE ARE United… 2000 84975. 3050128 16.4 5.6
# ℹ 4,499 more rows
# ℹ 3 more variables: region <chr>, income <chr>, GDP_intrillion <dbl>
#find top 4 GDP countriesGDP_top4 <- GDP_intrillion |>group_by(country) |>summarise(max_gdp=max(GDP_intrillion),na.rm =TRUE) |>arrange(desc(max_gdp)) |>head(4)GDP_top4
# A tibble: 4 × 3
country max_gdp na.rm
<chr> <dbl> <lgl>
1 China 18.1 TRUE
2 United States 17.3 TRUE
3 India 7.35 TRUE
4 Japan 4.66 TRUE
#filter the data with dplyr for the four desired countriescoutry_top4 <- GDP_intrillion |>filter(country %in% GDP_top4$country)coutry_top4
# A tibble: 100 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CN CHN China 1992 1260. 1164970000 18.3 29.4
2 CN CHN China 2005 5053. 1303720000 12.4 14
3 CN CHN China 2000 2915. 1262645000 14.0 21.2
4 CN CHN China 1991 1091. 1150780000 19.7 29.7
5 CN CHN China 2013 12219. 1357380000 12.1 6.3
6 CN CHN China 1999 2650. 1252735000 14.6 22.2
7 CN CHN China 2014 13255. 1364270000 12.4 5.9
8 CN CHN China 2003 3934. 1288400000 12.4 17.1
9 CN CHN China 2004 4423. 1296075000 12.3 15.5
10 CN CHN China 1993 1453. 1178440000 18.1 28.8
# ℹ 90 more rows
# ℹ 3 more variables: region <chr>, income <chr>, GDP_intrillion <dbl>
Draw chart 1 with ggplot2.
p1 <-ggplot(coutry_top4,aes(x=year,y=GDP_intrillion,color=country)) +geom_line(size=0.4)+geom_point()+xlim(1990,2015)+ylim(0,20)+scale_color_brewer(palette="Set1")+labs(title="China's Rise to Become the Largest Economy",caption="Source:nations.csv",x="Year",y="GDP($trillion)")+theme_minimal(base_size=12)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
p1
Chart 2 GDP by World Bank Region
Load the dataset * Each region’s area will be generated by the command geom_area ()
#create a new variable (RegionalGDP_intrillion) :regional GDP in trillions of dollars#group_by region and yearRegionalGDP_intrillion <- GDP_intrillion |>group_by(region,year) |>summarise(GDP=sum(GDP_intrillion,na.rm =TRUE))
`summarise()` has regrouped the output.
ℹ Summaries were computed grouped by region and year.
ℹ Output is grouped by region.
ℹ Use `summarise(.groups = "drop_last")` to silence this message.
ℹ Use `summarise(.by = c(region, year))` for per-operation grouping
(`?dplyr::dplyr_by`) instead.
RegionalGDP_intrillion
# A tibble: 175 × 3
# Groups: region [7]
region year GDP
<chr> <dbl> <dbl>
1 East Asia & Pacific 1990 5.52
2 East Asia & Pacific 1991 6.03
3 East Asia & Pacific 1992 6.50
4 East Asia & Pacific 1993 7.04
5 East Asia & Pacific 1994 7.64
6 East Asia & Pacific 1995 8.29
7 East Asia & Pacific 1996 8.96
8 East Asia & Pacific 1997 9.55
9 East Asia & Pacific 1998 9.60
10 East Asia & Pacific 1999 10.1
# ℹ 165 more rows
names(RegionalGDP_intrillion)
[1] "region" "year" "GDP"
head(RegionalGDP_intrillion)
# A tibble: 6 × 3
# Groups: region [1]
region year GDP
<chr> <dbl> <dbl>
1 East Asia & Pacific 1990 5.52
2 East Asia & Pacific 1991 6.03
3 East Asia & Pacific 1992 6.50
4 East Asia & Pacific 1993 7.04
5 East Asia & Pacific 1994 7.64
6 East Asia & Pacific 1995 8.29
Draw chart 2 with ggplot2.
p2 <-ggplot(RegionalGDP_intrillion,aes(x=year,y=GDP,fill=region)) +geom_area(alpha=0.9,size =0.2,color="white")+#xlim(1990,2015)+#ylim(0,100)+#color:Set1,Set2,Paired,Dark2 scale_fill_brewer(palette ="Paired") +labs(title="GDP by World Bank Region",caption="Source:nations.csv",x="Year",y="GDP($trillion)")+theme_minimal(base_size=12)p2