Week 6 Homework Assignment

Author

Leika Ray Joseph

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

setwd("/Users/leikarayjoseph/Desktop/Data 110") 
#upload my working directory so I can install my file.
nations <- read_csv("nations.csv")

Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# use read.csv to upload my csv file in r, and name it nations. When I use read_csv my file couldn't render.

nations1 <- nations |>
  select(country, population, gdp_percap, year, region) |>
  mutate(gdp = ((gdp_percap* population)/10^12))  # create the new column gdp, and the instruction tells how to assign the value.

nations1

# A tibble: 5,275 × 6
   country population gdp_percap  year region                  gdp
   <chr>        <dbl>      <dbl> <dbl> <chr>                 <dbl>
 1 Andorra      64291         NA  1996 Europe & Central Asia    NA
 2 Andorra      62707         NA  1994 Europe & Central Asia    NA
 3 Andorra      74783         NA  2003 Europe & Central Asia    NA
 4 Andorra      54511         NA  1990 Europe & Central Asia    NA
 5 Andorra      85474         NA  2009 Europe & Central Asia    NA
 6 Andorra      82326         NA  2011 Europe & Central Asia    NA
 7 Andorra      78337         NA  2004 Europe & Central Asia    NA
 8 Andorra      84419         NA  2010 Europe & Central Asia    NA
 9 Andorra      67770         NA  2001 Europe & Central Asia    NA
10 Andorra      71046         NA  2002 Europe & Central Asia    NA
# ℹ 5,265 more rows

nations2 <- nations1 |>
  group_by(country, year) |>
    filter(country %in% c("China", "Germany", "Japan", "United States" )) #group_by again and filter to select the countrey I want to work with.
nations2

# A tibble: 100 × 6
# Groups:   country, year [100]
   country population gdp_percap  year region                gdp
   <chr>        <dbl>      <dbl> <dbl> <chr>               <dbl>
 1 China   1164970000      1260.  1992 East Asia & Pacific  1.47
 2 China   1303720000      5053.  2005 East Asia & Pacific  6.59
 3 China   1262645000      2915.  2000 East Asia & Pacific  3.68
 4 China   1150780000      1091.  1991 East Asia & Pacific  1.26
 5 China   1357380000     12219.  2013 East Asia & Pacific 16.6 
 6 China   1252735000      2650.  1999 East Asia & Pacific  3.32
 7 China   1364270000     13255.  2014 East Asia & Pacific 18.1 
 8 China   1288400000      3934.  2003 East Asia & Pacific  5.07
 9 China   1296075000      4423.  2004 East Asia & Pacific  5.73
10 China   1178440000      1453.  1993 East Asia & Pacific  1.71
# ℹ 90 more rows

Chart1

#Reproduce one with the exact countries that is on the document.
plot1 <- nations2 |>
  ggplot(aes(x = year, y = gdp, color = country)) +
  geom_line() +
  geom_point() +
  scale_color_brewer(palette = "Set1") + # Putting the choice of color in.
  labs( x= "year", 
        y= "GDP($ trillion)", 
        title = "China's Rise to Become the Largest Economy") +
  theme_bw()
plot1

Chart2

nations3 <- nations1 |>
  group_by(region, year) |>
  summarise(gdp_dollar = sum(gdp, na.rm = TRUE))

`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.

nations3

# A tibble: 175 × 3
# Groups:   region [7]
   region               year gdp_dollar
   <chr>               <dbl>      <dbl>
 1 East Asia & Pacific  1990       5.52
 2 East Asia & Pacific  1991       6.03
 3 East Asia & Pacific  1992       6.50
 4 East Asia & Pacific  1993       7.04
 5 East Asia & Pacific  1994       7.64
 6 East Asia & Pacific  1995       8.29
 7 East Asia & Pacific  1996       8.96
 8 East Asia & Pacific  1997       9.55
 9 East Asia & Pacific  1998       9.60
10 East Asia & Pacific  1999      10.1 
# ℹ 165 more rows

plot2 <- nations3 |>
  ggplot(aes(x = year, y = gdp_dollar, fill = region)) +
  geom_area(color="white") +
  labs( x= "year", 
        y= "GDP($ trillion)", 
        title = "GDP by World Bank Region") +
  scale_fill_brewer(palette = "Set2") +
  theme_bw()
plot2