Nations Homework

Author

Karen Pesca

1. Load the dataset
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("plotly")

The downloaded binary packages are in
    /var/folders/9f/_ykbhh0579j69vwc3rt8ryl40000gn/T//Rtmp5Zp0kp/downloaded_packages
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
setwd("/Users/karenlizethpp/Library/Mobile Documents/com~apple~CloudDocs/Data 110")
nations <- read_csv("nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
2. Check the structure of the dataset.
head(nations)
# A tibble: 6 × 10
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 2 more variables: region <chr>, income <chr>
3. Use mutate() to create the GDP variable in trillions.
nationsgdp <- nations %>%
  mutate(gdp_tri = (gdp_percap * population) / 10^12)

head(nationsgdp)
# A tibble: 6 × 11
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 3 more variables: region <chr>, income <chr>, gdp_tri <dbl>
4. Filter the data for the four desired countries.
sel_countries <- c("United States", "China", "Germany", "Colombia")

fil_nations <- nationsgdp %>%
  filter(country %in% sel_countries)
5. Chart 1
p1<- ggplot(fil_nations, aes(x = year, y = gdp_tri, color = country)) +
   geom_point(size = 2)+
  geom_line(size = 1) +
  scale_color_brewer(palette = "Set1") +
  labs(title = "China's Rise to Become the Largest Economy",
       x = "Year",
       y = "GDP ($ Trillion)",
       color = "Country") +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
p1<-ggplotly(p1)
p1
6. Chart 2.
nations_region <- nationsgdp %>%
  group_by(region, year) %>%
  summarise(GDP = sum(gdp_tri, na.rm = TRUE))
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
p2<-ggplot(nations_region, aes(x = year, y = GDP, fill = region)) +
  geom_area(color = "white", size = 0.2, alpha = 0.9) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "GDP by World Bank Region",
       x = "Year",
       y = "GDP ($ Trillion)",
       fill = "Region") +
  theme_minimal()
p2