w7_homework

Load a dataset of nations, and load librarites.

library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
# setwd("C:/Users/dkim174/Documents/Classes/Data 110/Datasets")
nationsgdp<-read_csv("nations.csv")

## Rows: 5275 Columns: 10

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(nationsgdp)

## spec_tbl_df [5,275 x 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ iso2c             : chr [1:5275] "AD" "AD" "AD" "AD" ...
##  $ iso3c             : chr [1:5275] "AND" "AND" "AND" "AND" ...
##  $ country           : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
##  $ year              : num [1:5275] 1996 1994 2003 1990 2009 ...
##  $ gdp_percap        : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
##  $ population        : num [1:5275] 64291 62707 74783 54511 85474 ...
##  $ birth_rate        : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
##  $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
##  $ region            : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
##  $ income            : chr [1:5275] "High income" "High income" "High income" "High income" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   iso2c = col_character(),
##   ..   iso3c = col_character(),
##   ..   country = col_character(),
##   ..   year = col_double(),
##   ..   gdp_percap = col_double(),
##   ..   population = col_double(),
##   ..   birth_rate = col_double(),
##   ..   neonat_mortal_rate = col_double(),
##   ..   region = col_character(),
##   ..   income = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Draw the first chart.

navigate the data before using mutate

data <-nationsgdp
colnames(data)

##  [1] "iso2c"              "iso3c"              "country"           
##  [4] "year"               "gdp_percap"         "population"        
##  [7] "birth_rate"         "neonat_mortal_rate" "region"            
## [10] "income"

data <- mutate(data, gdp=(gdp_percap/population)/10^12) #add a new column into data.

Filter 4 countries in Asia from data.

filtered_data <- data %>% 
  filter(country=="Korea, Rep."| country=="Albania" | country=="India" | country=="Oman") # filter to select 4 countries in Asia
head(filtered_data)

## # A tibble: 6 x 11
##   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
##   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
## 1 AL    ALB   Albania  2004      5653.    3026939       13.0                9.8
## 2 AL    ALB   Albania  2002      4786.    3051010       14.6               10.7
## 3 AL    ALB   Albania  1994      2414.    3207536       21.9               12.9
## 4 AL    ALB   Albania  2009      9342.    2927519       11.7                7.9
## 5 AL    ALB   Albania  2003      5182.    3039616       13.8               10.3
## 6 AL    ALB   Albania  1999      3685.    3108778       17.3               11.7
## # ... with 3 more variables: region <chr>, income <chr>, gdp <dbl>

plot with ppint and line.

p1 <-filtered_data %>% 
  ggplot(aes(x=year,y=gdp,color=country))+
  ggtitle("Four Asian countries By GDP")+
  geom_point()+
  geom_line()+
  theme_bw()+
  scale_color_brewer("country")
p1

Draw the second chart.

grouped_data <-group_by(filtered_data,region,year) %>% 
summarise(GDP=sum(gdp,na.rm = TRUE))

## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.

Create a geom area plot with fill.

p2 <-grouped_data %>% 
  ggplot(aes(x=year,y=GDP,fill=region))+
  ggtitle("GDP by Grouped Region")+
  geom_area()+
  theme_bw()+
  scale_fill_brewer(palette = "set2")

## Warning in pal_name(palette, type): Unknown palette set2

p2

Create a geom area plot with fill in region and scaled color.

p3 <-grouped_data %>% 
  ggplot(aes(x=year,y=GDP,fill=region))+
  ggtitle("GDP by Grouped Region")+
  geom_area()+
  theme_bw()+
  scale_color_brewer(palette = "set2")

## Warning in pal_name(palette, type): Unknown palette set2

p3

w7_homework

Dong-Min Kim

10/15/2021

Draw the first chart.

Draw the second chart.