Week 8 - ggplot2 https://sites.google.com/site/cit137fall18/week-5

                **Assignment**
                

  1. Download tourism database from world bank http://data.worldbank.org/indicator/ST.INT.RCPT.CD https://sites.google.com/site/cit137fall18/week-5/tourism_2015.csv
  2. Plot a time series graph of tourism revenue vs year for three different countries
  3. Choose countries with different magnitudes (ie US and Ghana)
  4. Use a log scale to show the trends on the same graphic
  5. Use different colors for each country

PACKAGES

load tidyr, dplyr, and ggplot2

library(tidyr)
library(dplyr)
library(ggplot2)
library(scales)
#remove scientific notation/formatting
options(scipen=999)

READ DATA INTO R

Load data into R from link provided in slides. Check structure.

tour <- read.csv("https://sites.google.com/site/cit137fall18/week-5/tourism_2015.csv", skip = 4, check.names=FALSE)
str(tour)
'data.frame':   264 obs. of  62 variables:
 $ Country Name  : Factor w/ 264 levels "Afghanistan",..: 11 5 1 6 2 8 250 9 10 4 ...
 $ Country Code  : Factor w/ 264 levels "ABW","AFG","AGO",..: 1 5 2 3 4 6 7 8 9 10 ...
 $ Indicator Name: Factor w/ 1 level "International tourism, receipts (current US$)": 1 1 1 1 1 1 1 1 1 1 ...
 $ Indicator Code: Factor w/ 1 level "ST.INT.RCPT.CD": 1 1 1 1 1 1 1 1 1 1 ...
 $ 1960          : logi  NA NA NA NA NA NA ...
 $ 1961          : logi  NA NA NA NA NA NA ...
 $ 1962          : logi  NA NA NA NA NA NA ...
 $ 1963          : logi  NA NA NA NA NA NA ...
 $ 1964          : logi  NA NA NA NA NA NA ...
 $ 1965          : logi  NA NA NA NA NA NA ...
 $ 1966          : logi  NA NA NA NA NA NA ...
 $ 1967          : logi  NA NA NA NA NA NA ...
 $ 1968          : logi  NA NA NA NA NA NA ...
 $ 1969          : logi  NA NA NA NA NA NA ...
 $ 1970          : logi  NA NA NA NA NA NA ...
 $ 1971          : logi  NA NA NA NA NA NA ...
 $ 1972          : logi  NA NA NA NA NA NA ...
 $ 1973          : logi  NA NA NA NA NA NA ...
 $ 1974          : logi  NA NA NA NA NA NA ...
 $ 1975          : logi  NA NA NA NA NA NA ...
 $ 1976          : logi  NA NA NA NA NA NA ...
 $ 1977          : logi  NA NA NA NA NA NA ...
 $ 1978          : logi  NA NA NA NA NA NA ...
 $ 1979          : logi  NA NA NA NA NA NA ...
 $ 1980          : logi  NA NA NA NA NA NA ...
 $ 1981          : logi  NA NA NA NA NA NA ...
 $ 1982          : logi  NA NA NA NA NA NA ...
 $ 1983          : logi  NA NA NA NA NA NA ...
 $ 1984          : logi  NA NA NA NA NA NA ...
 $ 1985          : logi  NA NA NA NA NA NA ...
 $ 1986          : logi  NA NA NA NA NA NA ...
 $ 1987          : logi  NA NA NA NA NA NA ...
 $ 1988          : logi  NA NA NA NA NA NA ...
 $ 1989          : logi  NA NA NA NA NA NA ...
 $ 1990          : logi  NA NA NA NA NA NA ...
 $ 1991          : logi  NA NA NA NA NA NA ...
 $ 1992          : logi  NA NA NA NA NA NA ...
 $ 1993          : logi  NA NA NA NA NA NA ...
 $ 1994          : logi  NA NA NA NA NA NA ...
 $ 1995          : num  554000000 NA NA 27000000 70400000 ...
 $ 1996          : num  666000000 NA NA 38000000 93800000 ...
 $ 1997          : num  726000000 NA NA 24000000 33600000 ...
 $ 1998          : num  786000000 NA NA 39000000 60230000 ...
 $ 1999          : num  782000000 NA NA 31000000 218000000 ...
 $ 2000          : num  850000000 NA NA 34000000 398000000 ...
 $ 2001          : num  825000000 NA NA 35000000 451000000 ...
 $ 2002          : num  835000000 NA NA 51000000 492000000 ...
 $ 2003          : num  858100000 NA NA 63000000 537000000 ...
 $ 2004          : num  1056000000 NA NA 82000000 756000000 ...
 $ 2005          : num  1097000000 NA NA 103000000 880000000 ...
 $ 2006          : num  1064100000 NA NA 91000000 1057000000 ...
 $ 2007          : num  1213400000 NA NA 236000000 1479000000 ...
 $ 2008          : num  1352300000 NA 45000000 293000000 1848000000 ...
 $ 2009          : num  1223200000 NA 87000000 554000000 2014000000 ...
 $ 2010          : num  1254100000 NA 167000000 726000000 1780000000 ...
 $ 2011          : num  1357600000 NA 147000000 653000000 1833000000 ...
 $ 2012          : num  1412100000 NA 168000000 711000000 1623000000 ...
 $ 2013          : num  1510800000 NA 154000000 1241000000 1670000000 ...
 $ 2014          : num  1631900000 NA 91000000 1597000000 1849000000 ...
 $ 2015          : logi  NA NA NA NA NA NA ...
 $ 2016          : logi  NA NA NA NA NA NA ...
 $               : logi  NA NA NA NA NA NA ...

CLEAN UP WITH TIDYR

tour_tidy <- tour %>% 
     select(-(2:4), -62) %>%
     gather(Year, Intl_Tourism_Rcpt_USD, `1960`:`2016`, na.rm = TRUE) %>% 
     arrange(`Country Name`, Year) 
str(tour_tidy)  
'data.frame':   4542 obs. of  3 variables:
 $ Country Name         : Factor w/ 264 levels "Afghanistan",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ Year                 : chr  "2008" "2009" "2010" "2011" ...
 $ Intl_Tourism_Rcpt_USD: num  45000000 87000000 167000000 147000000 168000000 154000000 91000000 70400000 93800000 33600000 ...
head(tour_tidy) 
tail(tour_tidy)
# change to tibble
tour_tidy <- tbl_df(tour_tidy)
str(tour_tidy)  
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   4542 obs. of  3 variables:
 $ Country Name         : Factor w/ 264 levels "Afghanistan",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ Year                 : chr  "2008" "2009" "2010" "2011" ...
 $ Intl_Tourism_Rcpt_USD: num  45000000 87000000 167000000 147000000 168000000 154000000 91000000 70400000 93800000 33600000 ...
 # change `Country Name` to Country  
names(tour_tidy)[1] <- "Country"  
names(tour_tidy)
[1] "Country"               "Year"                  "Intl_Tourism_Rcpt_USD"

PLOTTING DATA W GGPLOT2, AS FUNCTION

Make geom_line() chart comparing tourism dollars over time of the three countries with a scale_y_log10(). Make tour_tidy a function tourism_fun wherein assigning countries to the function argument updates the chart

# function name: tourism_fun
tourism_fun <- function(...) {
     tour_tidy %>% 
          filter(Country %in% c(...)) %>% 
          ggplot(aes(x = Year, y = Intl_Tourism_Rcpt_USD, col = Country, group = Country)) +
          geom_line() +
          scale_y_log10(labs(y = "Intl Tourism, Current USD"), labels = comma)
}
#str(tourism_fun)
class(tourism_fun)
[1] "function"
tourism_fun
function(...) {
     tour_tidy %>% 
          filter(Country %in% c(...)) %>% 
          ggplot(aes(x = Year, y = Intl_Tourism_Rcpt_USD, col = Country, group = Country)) +
          geom_line() +
          scale_y_log10(labs(y = "Intl Tourism, Current USD"), labels = comma)
}
 # assign countries to object
country_choose <- c("Netherlands", "Moldova", "Luxembourg", "Kyrgyz Republic")
 # run function with object
tourism_fun(country_choose)

 # argument set to "..." means object is undefined and therefore flexible 
tourism_countries <- c("Zimbabwe", "Togo", "Brazil", "Peru")
 # run function with object
tourism_fun(tourism_countries)

LS0tCnRpdGxlOiAiV0VFSyA4OiBQTE9UVElORyBUT1VSSVNNIFdJVEggR0dQTE9UMiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCldlZWsgOCAtIGdncGxvdDIKaHR0cHM6Ly9zaXRlcy5nb29nbGUuY29tL3NpdGUvY2l0MTM3ZmFsbDE4L3dlZWstNQoKIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIwoKICAgICAgICAgICAgICAgICAgICAqKkFzc2lnbm1lbnQqKgogICAgICAgICAgICAgICAgICAgIAojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCgoxKSAgIERvd25sb2FkIHRvdXJpc20gZGF0YWJhc2UgZnJvbSB3b3JsZCBiYW5rCiAgICAgaHR0cDovL2RhdGEud29ybGRiYW5rLm9yZy9pbmRpY2F0b3IvU1QuSU5ULlJDUFQuQ0QKICAgICBodHRwczovL3NpdGVzLmdvb2dsZS5jb20vc2l0ZS9jaXQxMzdmYWxsMTgvd2Vlay01L3RvdXJpc21fMjAxNS5jc3YKMikgICBQbG90IGEgdGltZSBzZXJpZXMgZ3JhcGggb2YgdG91cmlzbSByZXZlbnVlIHZzIHllYXIgZm9yIHRocmVlIGRpZmZlcmVudCBjb3VudHJpZXMKMykgICBDaG9vc2UgY291bnRyaWVzIHdpdGggZGlmZmVyZW50IG1hZ25pdHVkZXMgKGllIFVTIGFuZCBHaGFuYSkKNCkgICBVc2UgYSBsb2cgc2NhbGUgdG8gc2hvdyB0aGUgdHJlbmRzIG9uIHRoZSBzYW1lIGdyYXBoaWMKNSkgICBVc2UgZGlmZmVyZW50IGNvbG9ycyBmb3IgZWFjaCBjb3VudHJ5CgoKIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIwojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCiMgICAgUEFDS0FHRVMKCmxvYWQgdGlkeXIsIGRwbHlyLCBhbmQgZ2dwbG90MgoKYGBge3IgZWNobz1UUlVFfQoKbGlicmFyeSh0aWR5cikKbGlicmFyeShkcGx5cikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHNjYWxlcykKCiNyZW1vdmUgc2NpZW50aWZpYyBub3RhdGlvbi9mb3JtYXR0aW5nCm9wdGlvbnMoc2NpcGVuPTk5OSkKCmBgYAoKCiMgICAgUkVBRCBEQVRBIElOVE8gUgoKTG9hZCBkYXRhIGludG8gUiBmcm9tIGxpbmsgcHJvdmlkZWQgaW4gc2xpZGVzLgpDaGVjayBzdHJ1Y3R1cmUuCgpgYGB7ciBlY2hvPVRSVUV9Cgp0b3VyIDwtIHJlYWQuY3N2KCJodHRwczovL3NpdGVzLmdvb2dsZS5jb20vc2l0ZS9jaXQxMzdmYWxsMTgvd2Vlay01L3RvdXJpc21fMjAxNS5jc3YiLCBza2lwID0gNCwgY2hlY2submFtZXM9RkFMU0UpCgpzdHIodG91cikKCmBgYAoKCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMKIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIwojICAgQ0xFQU4gVVAgV0lUSCBUSURZUiAgCgogKiBSZW1vdmUgdW5uZWVkZWQgY29sdW1uczogIAogICAgICArIGBDb3VudHJ5IENvZGVgLCBgSW5kaWNhdG9yIE5hbWVgIGFuZCBgSW5kaWNhdG9yIENvZGVgICAKICAgICAgKyBgY29sdW1uIDYyYCBiL2MgaXQgZG9lc24ndCBoYXZlIGEgdGl0bGUgYW5kIGNhdXNpbmcgcHJvYmxlbXMgIAogKiAgICBVc2UgYGdhdGhlcigpYCB0byBjaGFuZ2UgZnJvbSB3aWRlIHRvIGxvbmcgIAogKiAgICBSZW1vdmUgbnVsbCB2YWx1ZXMgIAogKiAgICBBcnJhbmdlIC8gc29ydCBkYXRhICAKICogICAgRm9ybWF0IGFzIHRpYmJsZSwgY2hhbmdlcyBgWWVhcmAgdG8gYGZhY3RvcmAKICogICAgUmVuYW1lIGBDb3VudHJ5IE5hbWVgIGFzIGBDb3VudHJ5YCAgCgpgYGB7ciBlY2hvPVRSVUV9Cgp0b3VyX3RpZHkgPC0gdG91ciAlPiUgCiAgICAgc2VsZWN0KC0oMjo0KSwgLTYyKSAlPiUKICAgICBnYXRoZXIoWWVhciwgSW50bF9Ub3VyaXNtX1JjcHRfVVNELCBgMTk2MGA6YDIwMTZgLCBuYS5ybSA9IFRSVUUpICU+JSAKICAgICBhcnJhbmdlKGBDb3VudHJ5IE5hbWVgLCBZZWFyKSAKCnN0cih0b3VyX3RpZHkpICAKCmhlYWQodG91cl90aWR5KSAKdGFpbCh0b3VyX3RpZHkpCgojIGNoYW5nZSB0byB0aWJibGUKdG91cl90aWR5IDwtIHRibF9kZih0b3VyX3RpZHkpCnN0cih0b3VyX3RpZHkpICAKCiAjIGNoYW5nZSBgQ291bnRyeSBOYW1lYCB0byBDb3VudHJ5ICAKbmFtZXModG91cl90aWR5KVsxXSA8LSAiQ291bnRyeSIgIApuYW1lcyh0b3VyX3RpZHkpCgpgYGAgIAoKCgojICAgIFBMT1RUSU5HIERBVEEgVyBHR1BMT1QyLCBBUyBGVU5DVElPTgoKTWFrZSBgZ2VvbV9saW5lKClgIGNoYXJ0IGNvbXBhcmluZyB0b3VyaXNtIGRvbGxhcnMgb3ZlciB0aW1lIG9mIHRoZSB0aHJlZSBjb3VudHJpZXMgd2l0aCBhIGBzY2FsZV95X2xvZzEwKClgLgpNYWtlIGB0b3VyX3RpZHlgIGEgZnVuY3Rpb24gYHRvdXJpc21fZnVuYCB3aGVyZWluIGFzc2lnbmluZyBjb3VudHJpZXMgdG8gdGhlIGZ1bmN0aW9uIGFyZ3VtZW50IHVwZGF0ZXMgdGhlIGNoYXJ0CgoKYGBge3IgZWNobz1UUlVFfQojIGZ1bmN0aW9uIG5hbWU6IHRvdXJpc21fZnVuCgp0b3VyaXNtX2Z1biA8LSBmdW5jdGlvbiguLi4pIHsKICAgICB0b3VyX3RpZHkgJT4lIAogICAgICAgICAgZmlsdGVyKENvdW50cnkgJWluJSBjKC4uLikpICU+JSAKICAgICAgICAgIGdncGxvdChhZXMoeCA9IFllYXIsIHkgPSBJbnRsX1RvdXJpc21fUmNwdF9VU0QsIGNvbCA9IENvdW50cnksIGdyb3VwID0gQ291bnRyeSkpICsKICAgICAgICAgIGdlb21fbGluZSgpICsKICAgICAgICAgIHNjYWxlX3lfbG9nMTAobGFicyh5ID0gIkludGwgVG91cmlzbSwgQ3VycmVudCBVU0QiKSwgbGFiZWxzID0gY29tbWEpCn0KCiNzdHIodG91cmlzbV9mdW4pCmNsYXNzKHRvdXJpc21fZnVuKQp0b3VyaXNtX2Z1bgoKICMgYXNzaWduIGNvdW50cmllcyB0byBvYmplY3QKY291bnRyeV9jaG9vc2UgPC0gYygiTmV0aGVybGFuZHMiLCAiTW9sZG92YSIsICJMdXhlbWJvdXJnIiwgIkt5cmd5eiBSZXB1YmxpYyIpCgogIyBydW4gZnVuY3Rpb24gd2l0aCBvYmplY3QKdG91cmlzbV9mdW4oY291bnRyeV9jaG9vc2UpCgogIyBhcmd1bWVudCBzZXQgdG8gIi4uLiIgbWVhbnMgb2JqZWN0IGlzIHVuZGVmaW5lZCBhbmQgdGhlcmVmb3JlIGZsZXhpYmxlIAp0b3VyaXNtX2NvdW50cmllcyA8LSBjKCJaaW1iYWJ3ZSIsICJUb2dvIiwgIkJyYXppbCIsICJQZXJ1IikKCiAjIHJ1biBmdW5jdGlvbiB3aXRoIG9iamVjdAp0b3VyaXNtX2Z1bih0b3VyaXNtX2NvdW50cmllcykKCmBgYAo=