library(tidyverse) # I just noticed tidyverse is needed everywhere!!
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd('C:/Users/Ma Family/Documents/R/DATA110/week6')
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
library(ggplot2)
nations <- nations %>%
mutate(gdp = (gdp_percap * population)/10^12)
nations
## # A tibble: 5,275 x 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # ... with 5,265 more rows, and 3 more variables: region <chr>, income <chr>,
## # gdp <dbl>
# names(nations)
⇨ Now, I selected 3 countries from East Asia and the United States.
#a.usa <- nations[grep("tates", nations$country), ] # find a string including particular letters in data frame.
four <- nations %>% filter(country == "Korea, Rep."|
country == "China"|
country == "Japan"|
country == "United States" )
table(four$country)
##
## China Japan Korea, Rep. United States
## 25 25 25 25
names(four)
## [1] "iso2c" "iso3c" "country"
## [4] "year" "gdp_percap" "population"
## [7] "birth_rate" "neonat_mortal_rate" "region"
## [10] "income" "gdp"
⇨ I checked if there are NAs in the data frame.
sum(is.na.data.frame(four))
## [1] 0
⇨ First, create a new dataframe grouped by country
four_gdp <- four %>%
group_by(country)
four_gdp
## # A tibble: 100 x 11
## # Groups: country [4]
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CN CHN China 1992 1260. 1164970000 18.3 29.4
## 2 CN CHN China 2005 5053. 1303720000 12.4 14
## 3 CN CHN China 2000 2915. 1262645000 14.0 21.2
## 4 CN CHN China 1991 1091. 1150780000 19.7 29.7
## 5 CN CHN China 2013 12219. 1357380000 12.1 6.3
## 6 CN CHN China 1999 2650. 1252735000 14.6 22.2
## 7 CN CHN China 2014 13255. 1364270000 12.4 5.9
## 8 CN CHN China 2003 3934. 1288400000 12.4 17.1
## 9 CN CHN China 2004 4423. 1296075000 12.3 15.5
## 10 CN CHN China 1993 1453. 1178440000 18.1 28.8
## # ... with 90 more rows, and 3 more variables: region <chr>, income <chr>,
## # gdp <dbl>
⇨ Second, make a interactive scattor plot using ggplot() and ggplotly()
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.5
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- ggplot(data = four_gdp, aes(x = year, y = gdp, col = country)) +
geom_line() + geom_point() +
scale_color_brewer(palette="Set1") +
labs(title = "China's Rise to Become the Largest Economy", x = "Year", y = "GDP ($trillion)") + # Add title, X and Y axis lables
theme(plot.title=element_text(size=18,color="steelblue", lineheight=1.2), axis.title.x=element_text(size=13, color = "salmon"), axis.title.y=element_text(size=13, color = "salmon"), axis.text.x=element_text(size=10), axis.text.y=element_text(size=10)) # Change title, X and Y labels' sizes, colors
ggplotly(p)
four %>% filter(year == 2014) %>% select(country, gdp_percap) %>% head
## # A tibble: 4 x 2
## country gdp_percap
## <chr> <dbl>
## 1 China 13255.
## 2 Japan 36577.
## 3 Korea, Rep. 33417.
## 4 United States 54398.
⇨ Korea’s GDP looks like it had increased very little for 25 years. However, GDP per capita of Korea has increased a lot and now it is around 2.5 times China’s and approached Japan’s GDP per capita. But the population of Korea is relatively small and we can find out that Korea’s economy relies largely on the other countries’ economies, for example, especially China.
regions <- nations %>%
group_by(region, year) %>%
summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
regions
## # A tibble: 175 x 3
## # Groups: region [7]
## region year GDP
## <chr> <dbl> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
## 7 East Asia & Pacific 1996 8.96
## 8 East Asia & Pacific 1997 9.55
## 9 East Asia & Pacific 1998 9.60
## 10 East Asia & Pacific 1999 10.1
## # ... with 165 more rows
ggplot(data = regions, aes(x = year, y = GDP, fill = region)) + # The fill is to fill different colors to each area.
geom_area(color = "white", size = .2, alpha = 0.75) + # The color in geom_area() is the line color, not area color.
scale_fill_brewer(palette = "Set2") +
labs(title="GDP by World Bank Region" , y="GDP ($trillion)")
⇨ Let’s try to apply a different themed appearance to the chart.
library(viridis)
## Warning: package 'viridis' was built under R version 4.0.5
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.0.5
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.0.5
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
ggplot(data = regions, aes(x = year, y = GDP, fill = region)) +
geom_area(color = "white", alpha = 0.6, size = .5) +
scale_fill_viridis(discrete = T) +
theme_ipsum() +
labs(title="GDP by World Bank Region" , y="GDP($trillion)")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database