EX01
dta <- read.csv("nlsy86long.csv", header = T)
dta %>%
gather(key = test_var, value = test_score,8:9)%>%
head
## id sex race time grade year month test_var test_score
## 1 2390 Female Majority 1 0 6 67 math 14.285714
## 2 2560 Female Majority 1 0 6 66 math 20.238095
## 3 3740 Female Majority 1 0 6 67 math 17.857143
## 4 4020 Male Majority 1 0 5 60 math 7.142857
## 5 6350 Male Majority 1 1 7 78 math 29.761905
## 6 7030 Male Majority 1 0 5 62 math 14.285714
EX02
dta_02 <- Vocab
dta_02 %>%
rename(Gender = sex) %>%
group_by(Gender, year) %>%
summarize(edu_m = mean(education, na.rm = T),
edu_se = sd(education, na.rm = T)/sqrt(n()),
voc_m = mean(vocabulary, na.rm = T),
voc_se = sd(vocabulary, na.rm = T)/sqrt(n())) %>%
ggplot(data = ., aes(x = year, y = edu_m, color = Gender)) +
geom_point(position = position_dodge(.5), size = rel(2))+
geom_line(aes(group = Gender), position = position_dodge(.5)) +
geom_errorbar(aes(ymin = edu_m - 2*edu_se, ymax = edu_m + 2*edu_se), width = .1, position = position_dodge(.5)) +
geom_point(aes(y = voc_m), position = position_dodge(.5), size = rel(2), pch = 1)+
geom_line(aes(y = voc_m, group = Gender), position = position_dodge(.5), linetype = "dashed") +
geom_errorbar(aes(ymin = voc_m - 2*voc_se, ymax = voc_m + 2*voc_se), width = .1, position = position_dodge(.5)) +
labs(x = "Year", y = "Average Education Year and Vocabulary") +
theme_bw()
## Warning: package 'bindrcpp' was built under R version 3.4.4

EX03
dta_03 <- read.table("probeL.txt", header = T)
dta_03 %>%
mutate(Position = paste("Pos", Position, sep = "_")) %>%
spread(Position, Response_Time)
## ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1 S01 51 36 50 35 42
## 2 S02 27 20 26 17 27
## 3 S03 37 22 41 37 30
## 4 S04 42 36 32 34 27
## 5 S05 27 18 33 14 29
## 6 S06 43 32 43 35 40
## 7 S07 41 22 36 25 38
## 8 S08 38 21 31 20 16
## 9 S09 36 23 27 25 28
## 10 S10 26 31 31 32 36
## 11 S11 29 20 25 26 25
EX04
dta_c <-read.table("nobel_countries.txt", header = T)
dta_w <-read.table("nobel_winners.txt", header = T)
#dta_c和dta_w兩筆資料共同部分合併,其餘的刪除
inner_join(dta_c, dta_w)
## Joining, by = "Year"
## Country Year Name Gender
## 1 France 2014 Patrick Modiano Male
## 2 UK 1950 Bertrand Russell Male
## 3 UK 2017 Kazuo Ishiguro Male
## 4 US 2016 Bob Dylan Male
## 5 Canada 2013 Alice Munro Female
## 6 China 2012 Mo Yan Male
#從dta_c和dta_w兩筆資料共同部分中僅保留dta_c原有的部分
semi_join(dta_c, dta_w)
## Joining, by = "Year"
## Country Year
## 1 France 2014
## 2 UK 1950
## 3 UK 2017
## 4 US 2016
## 5 Canada 2013
## 6 China 2012
#以dta_c的資料做索引插入dta_w的資料
left_join(dta_c, dta_w)
## Joining, by = "Year"
## Country Year Name Gender
## 1 France 2014 Patrick Modiano Male
## 2 UK 1950 Bertrand Russell Male
## 3 UK 2017 Kazuo Ishiguro Male
## 4 US 2016 Bob Dylan Male
## 5 Canada 2013 Alice Munro Female
## 6 China 2012 Mo Yan Male
## 7 Russia 2015 <NA> <NA>
## 8 Sweden 2011 <NA> <NA>
#依據dta_c的資料,列出dta_w中無法合併的部分
anti_join(dta_c, dta_w)
## Joining, by = "Year"
## Country Year
## 1 Russia 2015
## 2 Sweden 2011
#dta_c和dta_w兩筆資料直接合併
full_join(dta_c, dta_w)
## Joining, by = "Year"
## Country Year Name Gender
## 1 France 2014 Patrick Modiano Male
## 2 UK 1950 Bertrand Russell Male
## 3 UK 2017 Kazuo Ishiguro Male
## 4 US 2016 Bob Dylan Male
## 5 Canada 2013 Alice Munro Female
## 6 China 2012 Mo Yan Male
## 7 Russia 2015 <NA> <NA>
## 8 Sweden 2011 <NA> <NA>
## 9 <NA> 1938 Pearl Buck Female