#load data
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)


dtac <- read.table("/Users/Tjlee/Desktop/weather/nobel_countries.txt", header = TRUE)
dtaw <- read.table("/Users/Tjlee/Desktop/weather/nobel_winners.txt", header = TRUE)

str(dtac)
## 'data.frame':    8 obs. of  2 variables:
##  $ Country: Factor w/ 7 levels "Canada","China",..: 3 6 6 7 1 2 4 5
##  $ Year   : int  2014 1950 2017 2016 2013 2012 2015 2011
str(dtaw)
## 'data.frame':    7 obs. of  3 variables:
##  $ Name  : Factor w/ 7 levels "Alice  Munro",..: 6 2 4 3 1 5 7
##  $ Gender: Factor w/ 2 levels "Female","Male": 2 2 2 2 1 2 1
##  $ Year  : int  2014 1950 2017 2016 2013 2012 1938
#merge dta (lecture note P.4-5)
dta <- merge(dtac, dtaw, all = TRUE)
dta
##   Year Country              Name Gender
## 1 1938    <NA>        Pearl Buck Female
## 2 1950      UK Bertrand  Russell   Male
## 3 2011  Sweden              <NA>   <NA>
## 4 2012   China            Mo Yan   Male
## 5 2013  Canada      Alice  Munro Female
## 6 2014  France   Patrick Modiano   Male
## 7 2015  Russia              <NA>   <NA>
## 8 2016      US        Bob  Dylan   Male
## 9 2017      UK    Kazuo Ishiguro   Male
#inner_join dta (lecture note P.6)
dplyr::inner_join(dtac,dtaw)%>% arrange
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
#semi_join dta (lecture note P.7)
dplyr::semi_join(dtac,dtaw)%>% arrange
## Joining, by = "Year"
##   Country Year
## 1  France 2014
## 2      UK 1950
## 3      UK 2017
## 4      US 2016
## 5  Canada 2013
## 6   China 2012
#left_join dta (lecture note P.8)
dplyr::left_join(dtac,dtaw)%>% arrange
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>
#anti_join dta (lecture note P.9)
dplyr::anti_join(dtac,dtaw)%>% arrange
## Joining, by = "Year"
##   Country Year
## 1  Russia 2015
## 2  Sweden 2011
#full_join dta (lecture note P.10)
dplyr::full_join(dtac,dtaw)%>% arrange
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>
## 9    <NA> 1938        Pearl Buck Female