library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(dplyr)
library(knitr)
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#nations.csv Data contains the following fields:
##iso2c iso3c Two- and Three-letter codes for each country, assigned by the International Organization for Standardization.
##country Country name.
##year
##population Estimated total population at mid-year, including all residents apart from refugees.
##gdp_percap Gross Domestic Product per capita in current international dollars, corrected for purchasing power in different territories.
##population Estimated total population at mid-year, including all residents apart from refugees.
##birth_rate Live births during the year per 1,000 people, based on mid-year population estimate.
##neonat_mortal_rate Neonatal mortality rate: babies dying before reaching 28 days of age, per 1,000 live births in a given year.
##region income World Bank regions and income groups.
##income
##gdp_tn
head(nations)
## # A tibble: 6 × 10
## iso2c iso3c country year gdp_percap population birth_…¹ neona…² region income
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8 Europ… High …
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2 Europ… High …
## 3 AD AND Andorra 2003 NA 74783 10.3 2 Europ… High …
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3 Europ… High …
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7 Europ… High …
## 6 AD AND Andorra 2011 NA 82326 NA 1.6 Europ… High …
## # … with abbreviated variable names ¹birth_rate, ²neonat_mortal_rate
#Calculate total GDP by region and year # total GDP, in trillions of dollars, by region, over time
nations <- read_csv("nations.csv") %>%
mutate(gdp = gdp_percap*population/1000000000000)
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(nations)
## Rows: 5,275
## Columns: 11
## $ iso2c <chr> "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD…
## $ iso3c <chr> "AND", "AND", "AND", "AND", "AND", "AND", "AND", "A…
## $ country <chr> "Andorra", "Andorra", "Andorra", "Andorra", "Andorr…
## $ year <dbl> 1996, 1994, 2003, 1990, 2009, 2011, 2004, 2010, 200…
## $ gdp_percap <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ population <dbl> 64291, 62707, 74783, 54511, 85474, 82326, 78337, 84…
## $ birth_rate <dbl> 10.900, 10.900, 10.300, 11.900, 9.900, NA, 10.900, …
## $ neonat_mortal_rate <dbl> 2.8, 3.2, 2.0, 4.3, 1.7, 1.6, 2.0, 1.7, 2.1, 2.1, 2…
## $ region <chr> "Europe & Central Asia", "Europe & Central Asia", "…
## $ income <chr> "High income", "High income", "High income", "High …
## $ gdp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
tibble(nations)
## # A tibble: 5,275 × 11
## iso2c iso3c country year gdp_p…¹ popul…² birth…³ neona…⁴ region income gdp
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8 Europ… High … NA
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2 Europ… High … NA
## 3 AD AND Andorra 2003 NA 74783 10.3 2 Europ… High … NA
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3 Europ… High … NA
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7 Europ… High … NA
## 6 AD AND Andorra 2011 NA 82326 NA 1.6 Europ… High … NA
## 7 AD AND Andorra 2004 NA 78337 10.9 2 Europ… High … NA
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7 Europ… High … NA
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1 Europ… High … NA
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1 Europ… High … NA
## # … with 5,265 more rows, and abbreviated variable names ¹gdp_percap,
## # ²population, ³birth_rate, ⁴neonat_mortal_rate
filter(nations)
## # A tibble: 5,275 × 11
## iso2c iso3c country year gdp_p…¹ popul…² birth…³ neona…⁴ region income gdp
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8 Europ… High … NA
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2 Europ… High … NA
## 3 AD AND Andorra 2003 NA 74783 10.3 2 Europ… High … NA
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3 Europ… High … NA
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7 Europ… High … NA
## 6 AD AND Andorra 2011 NA 82326 NA 1.6 Europ… High … NA
## 7 AD AND Andorra 2004 NA 78337 10.9 2 Europ… High … NA
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7 Europ… High … NA
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1 Europ… High … NA
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1 Europ… High … NA
## # … with 5,265 more rows, and abbreviated variable names ¹gdp_percap,
## # ²population, ³birth_rate, ⁴neonat_mortal_rate
summary(nations)
## iso2c iso3c country year
## Length:5275 Length:5275 Length:5275 Min. :1990
## Class :character Class :character Class :character 1st Qu.:1996
## Mode :character Mode :character Mode :character Median :2002
## Mean :2002
## 3rd Qu.:2008
## Max. :2014
##
## gdp_percap population birth_rate neonat_mortal_rate
## Min. : 239.7 Min. :9.004e+03 Min. : 6.90 Min. : 0.70
## 1st Qu.: 2263.6 1st Qu.:7.175e+05 1st Qu.:13.40 1st Qu.: 6.70
## Median : 6563.2 Median :5.303e+06 Median :21.60 Median :15.00
## Mean : 12788.8 Mean :2.958e+07 Mean :24.16 Mean :19.40
## 3rd Qu.: 17195.0 3rd Qu.:1.757e+07 3rd Qu.:33.88 3rd Qu.:29.48
## Max. :141968.1 Max. :1.364e+09 Max. :55.12 Max. :73.10
## NA's :766 NA's :14 NA's :295 NA's :525
## region income gdp
## Length:5275 Length:5275 Min. : 0.0000
## Class :character Class :character 1st Qu.: 0.0077
## Mode :character Mode :character Median : 0.0324
## Mean : 0.3259
## 3rd Qu.: 0.1849
## Max. :18.0829
## NA's :766
plot1 <- nations %>%
filter(iso3c == "CHN" | iso3c == "DEU" | iso3c == "JPN" | iso3c == "USA") %>%
arrange(year)
# plot
ggplot(plot1, aes(x=year, y=gdp, color=country) ) +
geom_line() +
geom_point() +
# title
ggtitle("China's Rise to Become the Largest Economy") +
# size of title
theme(plot.title=element_text(size=14), ,
# axis text size
axis.text = element_text(size = 8)) +
# labeling x and y axis
xlab("year") +
ylab("GDP ($ trillion)") +
# axis title sizes
theme(axis.title=element_text(size=12) ) +
# Color Brewer Palette
scale_color_brewer(palette="Set1") +
# removing legend title
theme(legend.title = element_blank(), legend.key = element_rect(color = NA, fill = NA) ) +
# background color white
theme(panel.background = element_rect(fill = "white",color = "white") ) +
# grid lines light gray
theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', color = "#d8d8d8") ) +
theme(panel.grid.minor = element_line(size = 0.5, linetype = 'solid', color = "#d8d8d8") )
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
plot1
## # A tibble: 100 × 11
## iso2c iso3c country year gdp_p…¹ popul…² birth…³ neona…⁴ region income gdp
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 CN CHN China 1990 980. 1.14e9 21.1 29.7 East … Upper… 1.11
## 2 DE DEU Germany 1990 19033. 7.94e7 11.4 3.4 Europ… High … 1.51
## 3 JP JPN Japan 1990 19230. 1.24e8 10 2.5 East … High … 2.38
## 4 US USA United… 1990 23954. 2.50e8 16.7 5.8 North… High … 5.98
## 5 CN CHN China 1991 1091. 1.15e9 19.7 29.7 East … Upper… 1.26
## 6 DE DEU Germany 1991 20521. 8.00e7 10.4 3.5 Europ… High … 1.64
## 7 JP JPN Japan 1991 20467. 1.24e8 9.9 2.5 East … High … 2.54
## 8 US USA United… 1991 24405. 2.53e8 16.2 5.6 North… High … 6.17
## 9 CN CHN China 1992 1260. 1.16e9 18.3 29.4 East … Upper… 1.47
## 10 DE DEU Germany 1992 21230. 8.06e7 10 3.5 Europ… High … 1.71
## # … with 90 more rows, and abbreviated variable names ¹gdp_percap, ²population,
## # ³birth_rate, ⁴neonat_mortal_rate
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot2 <- nations %>%
mutate(gdp = gdp_percap*population/1000000000000) %>%
group_by(year, region) %>%
summarise(sum = sum(gdp, na.rm = TRUE)) %>%
arrange(year,region)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plot2
## # A tibble: 175 × 3
## # Groups: year [25]
## year region sum
## <dbl> <chr> <dbl>
## 1 1990 East Asia & Pacific 5.52
## 2 1990 Europe & Central Asia 9.36
## 3 1990 Latin America & Caribbean 2.40
## 4 1990 Middle East & North Africa 1.66
## 5 1990 North America 6.54
## 6 1990 South Asia 1.35
## 7 1990 Sub-Saharan Africa 0.787
## 8 1991 East Asia & Pacific 6.03
## 9 1991 Europe & Central Asia 9.71
## 10 1991 Latin America & Caribbean 2.55
## # … with 165 more rows
altgraph<- ggplot(plot2, aes(x = year, y = sum, fill = region)) +
# area chart and white outline
geom_area(color="white") +
# title
ggtitle("GDP by World Bank Region") +
# labeling the x and y axis
ylab("GDP ($ trillion)") +
xlab("Year") +
# Color Brewer Palette
scale_fill_brewer(palette = "Set2") +
# backgroud color
theme(panel.background = element_rect(fill = "white",color = "white") ) +
# gray grid lines
theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', color = "#d0d0d0") ) +
theme(panel.grid.minor = element_line(size = 0.5, linetype = 'solid', color = "#d0d0d0") )
altgraph