EX01

dta <- read.csv("nlsy86long.csv", header = T)
dta %>%
gather(key = test_var, value = test_score,8:9)%>%
head
##     id    sex     race time grade year month test_var test_score
## 1 2390 Female Majority    1     0    6    67     math  14.285714
## 2 2560 Female Majority    1     0    6    66     math  20.238095
## 3 3740 Female Majority    1     0    6    67     math  17.857143
## 4 4020   Male Majority    1     0    5    60     math   7.142857
## 5 6350   Male Majority    1     1    7    78     math  29.761905
## 6 7030   Male Majority    1     0    5    62     math  14.285714

EX02

dta_02 <- Vocab
dta_02 %>%
 rename(Gender = sex) %>%
 group_by(Gender, year) %>%
 summarize(edu_m = mean(education, na.rm = T),
           edu_se = sd(education, na.rm = T)/sqrt(n()),
           voc_m = mean(vocabulary, na.rm = T),
           voc_se = sd(vocabulary, na.rm = T)/sqrt(n())) %>%
 ggplot(data = ., aes(x = year, y = edu_m, color = Gender)) +
  geom_point(position = position_dodge(.5), size = rel(2))+
  geom_line(aes(group = Gender), position = position_dodge(.5)) +
  geom_errorbar(aes(ymin = edu_m - 2*edu_se, ymax = edu_m + 2*edu_se), width = .1, position = position_dodge(.5)) + 
  geom_point(aes(y = voc_m), position = position_dodge(.5), size = rel(2), pch = 1)+
  geom_line(aes(y = voc_m, group = Gender), position = position_dodge(.5), linetype = "dashed") +
  geom_errorbar(aes(ymin = voc_m - 2*voc_se, ymax = voc_m + 2*voc_se), width = .1, position = position_dodge(.5)) + 
  labs(x = "Year", y = "Average Education Year and Vocabulary") +
  theme_bw() 
## Warning: package 'bindrcpp' was built under R version 3.4.4

EX03

dta_03 <- read.table("probeL.txt", header = T)
dta_03 %>% 
  mutate(Position = paste("Pos", Position, sep = "_")) %>% 
  spread(Position, Response_Time)
##     ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1  S01    51    36    50    35    42
## 2  S02    27    20    26    17    27
## 3  S03    37    22    41    37    30
## 4  S04    42    36    32    34    27
## 5  S05    27    18    33    14    29
## 6  S06    43    32    43    35    40
## 7  S07    41    22    36    25    38
## 8  S08    38    21    31    20    16
## 9  S09    36    23    27    25    28
## 10 S10    26    31    31    32    36
## 11 S11    29    20    25    26    25

EX04

dta_c <-read.table("nobel_countries.txt", header = T)
dta_w <-read.table("nobel_winners.txt", header = T)
#dta_c和dta_w兩筆資料共同部分合併,其餘的刪除
inner_join(dta_c, dta_w)
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
#從dta_c和dta_w兩筆資料共同部分中僅保留dta_c原有的部分
semi_join(dta_c, dta_w)
## Joining, by = "Year"
##   Country Year
## 1  France 2014
## 2      UK 1950
## 3      UK 2017
## 4      US 2016
## 5  Canada 2013
## 6   China 2012
#以dta_c的資料做索引插入dta_w的資料
left_join(dta_c, dta_w)
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>
#依據dta_c的資料,列出dta_w中無法合併的部分
anti_join(dta_c, dta_w)
## Joining, by = "Year"
##   Country Year
## 1  Russia 2015
## 2  Sweden 2011
#dta_c和dta_w兩筆資料直接合併
full_join(dta_c, dta_w)
## Joining, by = "Year"
##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>
## 9    <NA> 1938        Pearl Buck Female