dta1 <- read.csv("nlsy86long.csv", h = T)
dtaL <- dta1 %>%
gather(key = test_var, value = test_score, 8:9) %>%
arrange(id)
head(dtaL, 10)
id sex race time grade year month test_var test_score
1 1003 Male Minority 1 0 5 60 math 11.90
2 1003 Male Minority 2 2 8 91 math 33.33
3 1003 Male Minority 3 3 10 116 math 27.38
4 1003 Male Minority 4 5 12 138 math 39.29
5 1003 Male Minority 1 0 5 60 read 10.71
6 1003 Male Minority 2 2 8 91 read 36.90
7 1003 Male Minority 3 3 10 116 read 36.90
8 1003 Male Minority 4 5 12 138 read 45.24
9 1012 Male Minority 1 0 6 75 math 27.38
10 1012 Male Minority 2 2 9 103 math 58.33
dta2 <- Vocab
ggplot(dta2, aes(x = education, y = vocabulary, group = sex)) +
geom_point(shape = 1 ,aes(color = sex)) +
stat_smooth(method = "lm", se = F, aes(color = sex)) +
facet_wrap( ~ factor(year)) +
labs(x ="education", y = "vocabulary") +
theme_bw()
dta3 <- read.table("probeL.txt", h = T)
dtaW <- dta3 %>%
mutate(pre = rep("Pos", dim(dta3)[1])) %>%
unite(Time, pre, Position) %>%
spread(Time, Response_Time) %>%
arrange(ID)
head(dtaW)
ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
1 S01 51 36 50 35 42
2 S02 27 20 26 17 27
3 S03 37 22 41 37 30
4 S04 42 36 32 34 27
5 S05 27 18 33 14 29
6 S06 43 32 43 35 40
load in data
nobel_c <- read.table("nobel_countries.txt", h = T)
nobel_w <-read.table("nobel_winners.txt", h = T)
保留所有的column,若有相對應的y,保留x的row。
inner_join(nobel_c, nobel_w)
Joining, by = "Year"
Country Year Name Gender
1 France 2014 Patrick Modiano Male
2 UK 1950 Bertrand Russell Male
3 UK 2017 Kazuo Ishiguro Male
4 US 2016 Bob Dylan Male
5 Canada 2013 Alice Munro Female
6 China 2012 Mo Yan Male
只保留x的column,若有相對應的y,保留x的row
semi_join(nobel_c, nobel_w)
Joining, by = "Year"
Country Year
1 France 2014
2 UK 1950
3 UK 2017
4 US 2016
5 Canada 2013
6 China 2012