Load a dataset of nations, and load librarites.
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# setwd("C:/Users/dkim174/Documents/Classes/Data 110/Datasets")
nationsgdp<-read_csv("nations.csv")
## Rows: 5275 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(nationsgdp)
## spec_tbl_df [5,275 x 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ iso2c : chr [1:5275] "AD" "AD" "AD" "AD" ...
## $ iso3c : chr [1:5275] "AND" "AND" "AND" "AND" ...
## $ country : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
## $ year : num [1:5275] 1996 1994 2003 1990 2009 ...
## $ gdp_percap : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
## $ population : num [1:5275] 64291 62707 74783 54511 85474 ...
## $ birth_rate : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
## $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
## $ region : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
## $ income : chr [1:5275] "High income" "High income" "High income" "High income" ...
## - attr(*, "spec")=
## .. cols(
## .. iso2c = col_character(),
## .. iso3c = col_character(),
## .. country = col_character(),
## .. year = col_double(),
## .. gdp_percap = col_double(),
## .. population = col_double(),
## .. birth_rate = col_double(),
## .. neonat_mortal_rate = col_double(),
## .. region = col_character(),
## .. income = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
navigate the data before using mutate
data <-nationsgdp
colnames(data)
## [1] "iso2c" "iso3c" "country"
## [4] "year" "gdp_percap" "population"
## [7] "birth_rate" "neonat_mortal_rate" "region"
## [10] "income"
data <- mutate(data, gdp=(gdp_percap/population)/10^12) #add a new column into data.
Filter 4 countries in Asia from data.
filtered_data <- data %>%
filter(country=="Korea, Rep."| country=="Albania" | country=="India" | country=="Oman") # filter to select 4 countries in Asia
head(filtered_data)
## # A tibble: 6 x 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AL ALB Albania 2004 5653. 3026939 13.0 9.8
## 2 AL ALB Albania 2002 4786. 3051010 14.6 10.7
## 3 AL ALB Albania 1994 2414. 3207536 21.9 12.9
## 4 AL ALB Albania 2009 9342. 2927519 11.7 7.9
## 5 AL ALB Albania 2003 5182. 3039616 13.8 10.3
## 6 AL ALB Albania 1999 3685. 3108778 17.3 11.7
## # ... with 3 more variables: region <chr>, income <chr>, gdp <dbl>
plot with ppint and line.
p1 <-filtered_data %>%
ggplot(aes(x=year,y=gdp,color=country))+
ggtitle("Four Asian countries By GDP")+
geom_point()+
geom_line()+
theme_bw()+
scale_color_brewer("country")
p1
grouped_data <-group_by(filtered_data,region,year) %>%
summarise(GDP=sum(gdp,na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
Create a geom area plot with fill.
p2 <-grouped_data %>%
ggplot(aes(x=year,y=GDP,fill=region))+
ggtitle("GDP by Grouped Region")+
geom_area()+
theme_bw()+
scale_fill_brewer(palette = "set2")
## Warning in pal_name(palette, type): Unknown palette set2
p2
Create a geom area plot with fill in region and scaled color.
p3 <-grouped_data %>%
ggplot(aes(x=year,y=GDP,fill=region))+
ggtitle("GDP by Grouped Region")+
geom_area()+
theme_bw()+
scale_color_brewer(palette = "set2")
## Warning in pal_name(palette, type): Unknown palette set2
p3