1

library(tidyr)
dta <- read.csv("/Users/Hsin/Documents/nlsy86long.csv",header = T)
dta %>%gather("test_var", "test_score", 8:9) %>% head
##     id    sex     race time grade year month test_var test_score
## 1 2390 Female Majority    1     0    6    67     math  14.285714
## 2 2560 Female Majority    1     0    6    66     math  20.238095
## 3 3740 Female Majority    1     0    6    67     math  17.857143
## 4 4020   Male Majority    1     0    5    60     math   7.142857
## 5 6350   Male Majority    1     1    7    78     math  29.761905
## 6 7030   Male Majority    1     0    5    62     math  14.285714

2

library(car)

head(Vocab)
##          year    sex education vocabulary
## 20040001 2004 Female         9          3
## 20040002 2004 Female        14          6
## 20040003 2004   Male        14          9
## 20040005 2004 Female        17          8
## 20040008 2004   Male        14          1
## 20040010 2004   Male        14          7
library(ggplot2)


ggplot(Vocab, aes(education,vocabulary, color = sex))+
  geom_point()+
  stat_smooth(method = "lm")+
  facet_wrap(~year)

3

dta3 <- read.table("/Users/Hsin/Documents/probeL.txt", header = T)
head(dta3)
##    ID Response_Time Position
## 1 S01            51        1
## 2 S01            36        2
## 3 S01            50        3
## 4 S01            35        4
## 5 S01            42        5
## 6 S02            27        1
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dta3 <- mutate(dta3, Position = paste("Pos", Position, sep = "_"))

new_dta3 <- spread(dta3, Position, Response_Time); new_dta3
##     ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1  S01    51    36    50    35    42
## 2  S02    27    20    26    17    27
## 3  S03    37    22    41    37    30
## 4  S04    42    36    32    34    27
## 5  S05    27    18    33    14    29
## 6  S06    43    32    43    35    40
## 7  S07    41    22    36    25    38
## 8  S08    38    21    31    20    16
## 9  S09    36    23    27    25    28
## 10 S10    26    31    31    32    36
## 11 S11    29    20    25    26    25

4

dta_c <- read.table("/Users/Hsin/Documents/nobel_countries.txt",header = T)
dta_w <- read.table("/Users/Hsin/Documents/nobel_winners.txt",header = T)
merge(dta_c,dta_w)
##   Year Country              Name Gender
## 1 1950      UK Bertrand  Russell   Male
## 2 2012   China            Mo Yan   Male
## 3 2013  Canada      Alice  Munro Female
## 4 2014  France   Patrick Modiano   Male
## 5 2016      US        Bob  Dylan   Male
## 6 2017      UK    Kazuo Ishiguro   Male
merge(dta_c,dta_w,all=TRUE)
##   Year Country              Name Gender
## 1 1938    <NA>        Pearl Buck Female
## 2 1950      UK Bertrand  Russell   Male
## 3 2011  Sweden              <NA>   <NA>
## 4 2012   China            Mo Yan   Male
## 5 2013  Canada      Alice  Munro Female
## 6 2014  France   Patrick Modiano   Male
## 7 2015  Russia              <NA>   <NA>
## 8 2016      US        Bob  Dylan   Male
## 9 2017      UK    Kazuo Ishiguro   Male
inner_join(dta_w,dta_c)
## Joining, by = "Year"
##                Name Gender Year Country
## 1   Patrick Modiano   Male 2014  France
## 2 Bertrand  Russell   Male 1950      UK
## 3    Kazuo Ishiguro   Male 2017      UK
## 4        Bob  Dylan   Male 2016      US
## 5      Alice  Munro Female 2013  Canada
## 6            Mo Yan   Male 2012   China
semi_join(dta_c,dta_w)
## Joining, by = "Year"
##   Country Year
## 1  France 2014
## 2      UK 1950
## 3      UK 2017
## 4      US 2016
## 5  Canada 2013
## 6   China 2012
left_join(dta_c,dta_w)
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>
anti_join(dta_c,dta_w)
## Joining, by = "Year"
##   Country Year
## 1  Russia 2015
## 2  Sweden 2011
full_join(dta_c,dta_w)
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>
## 9    <NA> 1938        Pearl Buck Female