Assignment 6

Author

Angel Porter

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggfortify) 
library(htmltools) 
library(plotly)


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

library(dplyr)

getwd()

[1] "C:/Users/angel/OneDrive/Desktop"

list.files()

 [1] "AALAS training"                       
 [2] "AngelPorter_resume_docx (2).docx"     
 [3] "assignment-6-final.rmarkdown"         
 [4] "assignment-6.rmarkdown"               
 [5] "assignment-6_files"                   
 [6] "assignment 6 final.qmd"               
 [7] "assignment 6 final.rmarkdown"         
 [8] "assignment 6.html"                    
 [9] "assignment 6.qmd"                     
[10] "assignment 6.rmarkdown"               
[11] "assignment 6_files"                   
[12] "Data110"                              
[13] "desktop.ini"                          
[14] "Discord.lnk"                          
[15] "Guardian Browser.lnk"                 
[16] "Myfirstworddocument.docx"             
[17] "nations.csv"                          
[18] "need proof of insurance.pptx"         
[19] "Python Bootcamp for Data Analytics"   
[20] "rsconnect"                            
[21] "Sociology 100"                        
[22] "Training Links"                       
[23] "Training material on CITI PROGRAM.org"
[24] "Zoom Workplace.lnk"

file.choose("nations.csv")

[1] "C:\\Users\\angel\\OneDrive\\Desktop\\assignment 6 final.qmd"

nations <- read_csv('C:/Users/angel/OneDrive/Desktop/nations.csv')

Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#source: nations
head(nations)

# A tibble: 6 × 10
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 2 more variables: region <chr>, income <chr>

View(nations)
str(nations)

spc_tbl_ [5,275 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ iso2c             : chr [1:5275] "AD" "AD" "AD" "AD" ...
 $ iso3c             : chr [1:5275] "AND" "AND" "AND" "AND" ...
 $ country           : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
 $ year              : num [1:5275] 1996 1994 2003 1990 2009 ...
 $ gdp_percap        : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
 $ population        : num [1:5275] 64291 62707 74783 54511 85474 ...
 $ birth_rate        : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
 $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
 $ region            : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
 $ income            : chr [1:5275] "High income" "High income" "High income" "High income" ...
 - attr(*, "spec")=
  .. cols(
  ..   iso2c = col_character(),
  ..   iso3c = col_character(),
  ..   country = col_character(),
  ..   year = col_double(),
  ..   gdp_percap = col_double(),
  ..   population = col_double(),
  ..   birth_rate = col_double(),
  ..   neonat_mortal_rate = col_double(),
  ..   region = col_character(),
  ..   income = col_character()
  .. )
 - attr(*, "problems")=<externalptr>

summary(nations)

    iso2c              iso3c             country               year     
 Length:5275        Length:5275        Length:5275        Min.   :1990  
 Class :character   Class :character   Class :character   1st Qu.:1996  
 Mode  :character   Mode  :character   Mode  :character   Median :2002  
                                                          Mean   :2002  
                                                          3rd Qu.:2008  
                                                          Max.   :2014  
                                                                        
   gdp_percap         population          birth_rate    neonat_mortal_rate
 Min.   :   239.7   Min.   :9.004e+03   Min.   : 6.90   Min.   : 0.70     
 1st Qu.:  2263.6   1st Qu.:7.175e+05   1st Qu.:13.40   1st Qu.: 6.70     
 Median :  6563.2   Median :5.303e+06   Median :21.60   Median :15.00     
 Mean   : 12788.8   Mean   :2.958e+07   Mean   :24.16   Mean   :19.40     
 3rd Qu.: 17195.0   3rd Qu.:1.757e+07   3rd Qu.:33.88   3rd Qu.:29.48     
 Max.   :141968.1   Max.   :1.364e+09   Max.   :55.12   Max.   :73.10     
 NA's   :766        NA's   :14          NA's   :295     NA's   :525       
    region             income         
 Length:5275        Length:5275       
 Class :character   Class :character  
 Mode  :character   Mode  :character

# Step 1: Create the new GDP variable for the entire dataset
# The dataset contains 'gdp_percap' and 'population' columns [1].
nations<-nations %>%
  mutate(gdp = gdp_percap * population / 1e12)
# ---------------------------------------------------------
# Chart 1: Four-Country Trend
# ---------------------------------------------------------

# Filter data for four desired countries
chart1_data <- nations %>%
  filter(country %in% c("United States", "China", "Germany", "Japan"))
View(chart1_data)
# Draw Chart 1
ggplot(data = chart1_data, mapping = aes(x = year, y = gdp, color = country)) +
  geom_point() +
  geom_line() +
  scale_color_brewer(palette = "Set1") +
labs(y = "GDP (in Trillions of Dollars)") # Add my own custom name here

# Step 1: Create the mutated variable AND summarize the data in one continuous pipeline
chart2_data <- nations %>%
  mutate(gdp = gdp_percap * population / 1e12) %>%
  group_by(region, year) %>%
  summarise(GDP = sum(gdp, na.rm = TRUE), .groups = "drop_last")
View(chart2_data)

# Step 2: Draw the area chart with ggplot2
ggplot(data = chart2_data, mapping = aes(x = year, y = GDP, fill = region)) +
  geom_area(color = "white", linewidth = 0.2) +
  scale_fill_brewer(palette = "Set2") +
   labs(title = "Total GDP by Region over Time", 
       y = "GDP (Trillions of Dollars)", 
       x = "Year")