1
library(tidyr)
dta <- read.csv("/Users/Hsin/Documents/nlsy86long.csv",header = T)
dta %>%gather("test_var", "test_score", 8:9) %>% head
## id sex race time grade year month test_var test_score
## 1 2390 Female Majority 1 0 6 67 math 14.285714
## 2 2560 Female Majority 1 0 6 66 math 20.238095
## 3 3740 Female Majority 1 0 6 67 math 17.857143
## 4 4020 Male Majority 1 0 5 60 math 7.142857
## 5 6350 Male Majority 1 1 7 78 math 29.761905
## 6 7030 Male Majority 1 0 5 62 math 14.285714
2
library(car)
head(Vocab)
## year sex education vocabulary
## 20040001 2004 Female 9 3
## 20040002 2004 Female 14 6
## 20040003 2004 Male 14 9
## 20040005 2004 Female 17 8
## 20040008 2004 Male 14 1
## 20040010 2004 Male 14 7
library(ggplot2)
ggplot(Vocab, aes(education,vocabulary, color = sex))+
geom_point()+
stat_smooth(method = "lm")+
facet_wrap(~year)

3
dta3 <- read.table("/Users/Hsin/Documents/probeL.txt", header = T)
head(dta3)
## ID Response_Time Position
## 1 S01 51 1
## 2 S01 36 2
## 3 S01 50 3
## 4 S01 35 4
## 5 S01 42 5
## 6 S02 27 1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dta3 <- mutate(dta3, Position = paste("Pos", Position, sep = "_"))
new_dta3 <- spread(dta3, Position, Response_Time); new_dta3
## ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1 S01 51 36 50 35 42
## 2 S02 27 20 26 17 27
## 3 S03 37 22 41 37 30
## 4 S04 42 36 32 34 27
## 5 S05 27 18 33 14 29
## 6 S06 43 32 43 35 40
## 7 S07 41 22 36 25 38
## 8 S08 38 21 31 20 16
## 9 S09 36 23 27 25 28
## 10 S10 26 31 31 32 36
## 11 S11 29 20 25 26 25
4
dta_c <- read.table("/Users/Hsin/Documents/nobel_countries.txt",header = T)
dta_w <- read.table("/Users/Hsin/Documents/nobel_winners.txt",header = T)
merge(dta_c,dta_w)
## Year Country Name Gender
## 1 1950 UK Bertrand Russell Male
## 2 2012 China Mo Yan Male
## 3 2013 Canada Alice Munro Female
## 4 2014 France Patrick Modiano Male
## 5 2016 US Bob Dylan Male
## 6 2017 UK Kazuo Ishiguro Male
merge(dta_c,dta_w,all=TRUE)
## Year Country Name Gender
## 1 1938 <NA> Pearl Buck Female
## 2 1950 UK Bertrand Russell Male
## 3 2011 Sweden <NA> <NA>
## 4 2012 China Mo Yan Male
## 5 2013 Canada Alice Munro Female
## 6 2014 France Patrick Modiano Male
## 7 2015 Russia <NA> <NA>
## 8 2016 US Bob Dylan Male
## 9 2017 UK Kazuo Ishiguro Male
inner_join(dta_w,dta_c)
## Joining, by = "Year"
## Name Gender Year Country
## 1 Patrick Modiano Male 2014 France
## 2 Bertrand Russell Male 1950 UK
## 3 Kazuo Ishiguro Male 2017 UK
## 4 Bob Dylan Male 2016 US
## 5 Alice Munro Female 2013 Canada
## 6 Mo Yan Male 2012 China
semi_join(dta_c,dta_w)
## Joining, by = "Year"
## Country Year
## 1 France 2014
## 2 UK 1950
## 3 UK 2017
## 4 US 2016
## 5 Canada 2013
## 6 China 2012
left_join(dta_c,dta_w)
## Joining, by = "Year"
## Country Year Name Gender
## 1 France 2014 Patrick Modiano Male
## 2 UK 1950 Bertrand Russell Male
## 3 UK 2017 Kazuo Ishiguro Male
## 4 US 2016 Bob Dylan Male
## 5 Canada 2013 Alice Munro Female
## 6 China 2012 Mo Yan Male
## 7 Russia 2015 <NA> <NA>
## 8 Sweden 2011 <NA> <NA>
anti_join(dta_c,dta_w)
## Joining, by = "Year"
## Country Year
## 1 Russia 2015
## 2 Sweden 2011
full_join(dta_c,dta_w)
## Joining, by = "Year"
## Country Year Name Gender
## 1 France 2014 Patrick Modiano Male
## 2 UK 1950 Bertrand Russell Male
## 3 UK 2017 Kazuo Ishiguro Male
## 4 US 2016 Bob Dylan Male
## 5 Canada 2013 Alice Munro Female
## 6 China 2012 Mo Yan Male
## 7 Russia 2015 <NA> <NA>
## 8 Sweden 2011 <NA> <NA>
## 9 <NA> 1938 Pearl Buck Female