Q1 NLSY data

dta1 <- read.csv("nlsy86long.csv", h = T)

dtaL <- dta1 %>% 
  gather(key = test_var, value = test_score, 8:9) %>%
  arrange(id)

head(dtaL, 10)
     id  sex     race time grade year month test_var test_score
1  1003 Male Minority    1     0    5    60     math      11.90
2  1003 Male Minority    2     2    8    91     math      33.33
3  1003 Male Minority    3     3   10   116     math      27.38
4  1003 Male Minority    4     5   12   138     math      39.29
5  1003 Male Minority    1     0    5    60     read      10.71
6  1003 Male Minority    2     2    8    91     read      36.90
7  1003 Male Minority    3     3   10   116     read      36.90
8  1003 Male Minority    4     5   12   138     read      45.24
9  1012 Male Minority    1     0    6    75     math      27.38
10 1012 Male Minority    2     2    9   103     math      58.33

Q2 Vocab data

dta2 <- Vocab 

ggplot(dta2, aes(x = education, y = vocabulary, group = sex)) +
  geom_point(shape = 1 ,aes(color = sex)) +
  stat_smooth(method = "lm",  se = F, aes(color = sex)) +
  facet_wrap( ~ factor(year)) +
  labs(x ="education", y = "vocabulary") +
  theme_bw()

Q3 Probe words data

dta3 <- read.table("probeL.txt", h = T)

dtaW <- dta3 %>%
  mutate(pre = rep("Pos", dim(dta3)[1])) %>%
  unite(Time, pre, Position) %>%
  spread(Time, Response_Time) %>%
  arrange(ID)
head(dtaW)
   ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
1 S01    51    36    50    35    42
2 S02    27    20    26    17    27
3 S03    37    22    41    37    30
4 S04    42    36    32    34    27
5 S05    27    18    33    14    29
6 S06    43    32    43    35    40

Q4

load in data

nobel_c <- read.table("nobel_countries.txt", h = T)
nobel_w <-read.table("nobel_winners.txt", h = T)

保留所有的column,若有相對應的y,保留x的row。

inner_join(nobel_c, nobel_w)
Joining, by = "Year"
  Country Year              Name Gender
1  France 2014   Patrick Modiano   Male
2      UK 1950 Bertrand  Russell   Male
3      UK 2017    Kazuo Ishiguro   Male
4      US 2016        Bob  Dylan   Male
5  Canada 2013      Alice  Munro Female
6   China 2012            Mo Yan   Male

只保留x的column,若有相對應的y,保留x的row

semi_join(nobel_c, nobel_w)
Joining, by = "Year"
  Country Year
1  France 2014
2      UK 1950
3      UK 2017
4      US 2016
5  Canada 2013
6   China 2012