Nations HW

Author

Michael Desir

libraries

library(tidyverse)
library(dplyr)
library(RColorBrewer)
library(plotly)

datasets

setwd("C:/Users/desir_7411ic3/Desktop/Montgomery College/DATA110/DATASETS-20240830T194929Z-001/DATASETS")
nations_orig <- read_csv("nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nations_orig)
# A tibble: 6 × 10
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 2 more variables: region <chr>, income <chr>
summary(nations_orig)
    iso2c              iso3c             country               year     
 Length:5275        Length:5275        Length:5275        Min.   :1990  
 Class :character   Class :character   Class :character   1st Qu.:1996  
 Mode  :character   Mode  :character   Mode  :character   Median :2002  
                                                          Mean   :2002  
                                                          3rd Qu.:2008  
                                                          Max.   :2014  
                                                                        
   gdp_percap         population          birth_rate    neonat_mortal_rate
 Min.   :   239.7   Min.   :9.004e+03   Min.   : 6.90   Min.   : 0.70     
 1st Qu.:  2263.6   1st Qu.:7.175e+05   1st Qu.:13.40   1st Qu.: 6.70     
 Median :  6563.2   Median :5.303e+06   Median :21.60   Median :15.00     
 Mean   : 12788.8   Mean   :2.958e+07   Mean   :24.16   Mean   :19.40     
 3rd Qu.: 17195.0   3rd Qu.:1.757e+07   3rd Qu.:33.88   3rd Qu.:29.48     
 Max.   :141968.1   Max.   :1.364e+09   Max.   :55.12   Max.   :73.10     
 NA's   :766        NA's   :14          NA's   :295     NA's   :525       
    region             income         
 Length:5275        Length:5275       
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      

GDP column

dataset <- nations_orig %>%
  mutate(gdp = (nations_orig$gdp_percap * nations_orig$population) / 1000000000000)
summary(dataset)
    iso2c              iso3c             country               year     
 Length:5275        Length:5275        Length:5275        Min.   :1990  
 Class :character   Class :character   Class :character   1st Qu.:1996  
 Mode  :character   Mode  :character   Mode  :character   Median :2002  
                                                          Mean   :2002  
                                                          3rd Qu.:2008  
                                                          Max.   :2014  
                                                                        
   gdp_percap         population          birth_rate    neonat_mortal_rate
 Min.   :   239.7   Min.   :9.004e+03   Min.   : 6.90   Min.   : 0.70     
 1st Qu.:  2263.6   1st Qu.:7.175e+05   1st Qu.:13.40   1st Qu.: 6.70     
 Median :  6563.2   Median :5.303e+06   Median :21.60   Median :15.00     
 Mean   : 12788.8   Mean   :2.958e+07   Mean   :24.16   Mean   :19.40     
 3rd Qu.: 17195.0   3rd Qu.:1.757e+07   3rd Qu.:33.88   3rd Qu.:29.48     
 Max.   :141968.1   Max.   :1.364e+09   Max.   :55.12   Max.   :73.10     
 NA's   :766        NA's   :14          NA's   :295     NA's   :525       
    region             income               gdp         
 Length:5275        Length:5275        Min.   : 0.0000  
 Class :character   Class :character   1st Qu.: 0.0077  
 Mode  :character   Mode  :character   Median : 0.0324  
                                       Mean   : 0.3259  
                                       3rd Qu.: 0.1849  
                                       Max.   :18.0829  
                                       NA's   :766      

First chart

create dataset for first chart

chart_of_4 <- dataset %>%
  filter(country %in% c("China","Germany","Japan","United States"))
head(dataset)
# A tibble: 6 × 11
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>

plot 1 design

plot1 <- chart_of_4 |>
  ggplot(aes(x=year,y=gdp, color=country)) + ## data
  geom_point() + ## plots points
  geom_line() + ## draws lines between points
  labs(x="Year", y="GDP ($ trillion)", title="China's Rise to Become the Largest Economy", caption="From World Bank Data") + ## info
  theme_bw() + ## change background
  theme(
    panel.border = element_blank() ## remove border
  ) +
  scale_color_brewer(palette="Set1") ## SUPPOSED to set colors
plot1 ## display plot

Second chart

plot 2 dataset

data2 <- dataset %>%
  group_by(region, year) %>%
  summarise(sum_GDP = sum(gdp, na.rm = TRUE))
summary(data2)
    region               year         sum_GDP       
 Length:175         Min.   :1990   Min.   : 0.7865  
 Class :character   1st Qu.:1996   1st Qu.: 2.8400  
 Mode  :character   Median :2002   Median : 6.5393  
                    Mean   :2002   Mean   : 8.3982  
                    3rd Qu.:2008   3rd Qu.:11.6291  
                    Max.   :2014   Max.   :32.5209  

plot 2 design

plot2 <- data2 |>
  ggplot(aes(x=year,y=sum_GDP,fill=region)) +
  geom_area(color="white",linewidth=0.2) +
  scale_fill_brewer(palette = "Set2") +
  labs(x="year",y="GDP ($ trillion)",title="GDP by World Bank Region",caption="From World Bank Data")+
  theme_bw() +
  theme(
    panel.border=element_blank()
    )
plot2