title: “Homework #3 NLSY 86 example reproduction” author: “Szu-Yu Chen” date: 21 September 2020

dta <- read.csv("C:/Users/ASUS/Desktop/data/nlsy86wide.csv")
library(tidyr)
dta_1<-gather(dta,key=grade,value=VALUE_G,grade86:grade92) 
dta_1 <-dta_1[,c("id","sex","race","grade","VALUE_G")]
head(dta_1)
##      id    sex     race   grade VALUE_G
## 1 23901 Female Majority grade86       0
## 2 25601 Female Majority grade86       0
## 3 37401 Female Majority grade86       0
## 4 40201   Male Majority grade86       0
## 5 63501   Male Majority grade86       1
## 6 70301   Male Majority grade86       0
dta_2<-gather(dta,key=year,value=VALUE_Y,age86year:age92year) 
dta_2 <- dta_2[,c("id","sex","race","year","VALUE_Y")]
head(dta_2) 
##      id    sex     race      year VALUE_Y
## 1 23901 Female Majority age86year       6
## 2 25601 Female Majority age86year       6
## 3 37401 Female Majority age86year       6
## 4 40201   Male Majority age86year       5
## 5 63501   Male Majority age86year       7
## 6 70301   Male Majority age86year       5
dta_3<-gather(dta,key=month,value=VALUE_M, age86month:age92month)
dta_3 <-dta_3[,c("id","sex","race","month","VALUE_M")]
head(dta_3) 
##      id    sex     race      month VALUE_M
## 1 23901 Female Majority age86month      67
## 2 25601 Female Majority age86month      66
## 3 37401 Female Majority age86month      67
## 4 40201   Male Majority age86month      60
## 5 63501   Male Majority age86month      78
## 6 70301   Male Majority age86month      62
dta_4<-gather(dta,key=math,value=VALUE_MA, math86:math92)
dta_4 <- dta_4[,c("id","sex","race","math","VALUE_MA")]
head(dta_4)
##      id    sex     race   math  VALUE_MA
## 1 23901 Female Majority math86 14.285714
## 2 25601 Female Majority math86 20.238095
## 3 37401 Female Majority math86 17.857143
## 4 40201   Male Majority math86  7.142857
## 5 63501   Male Majority math86 29.761905
## 6 70301   Male Majority math86 14.285714
dat_5<-gather(dta,key=read,value=VALUE_R,read86:read92)
dat_5<-dat_5[,c("id","sex","race","read","VALUE_R")]
head(dat_5)
##      id    sex     race   read   VALUE_R
## 1 23901 Female Majority read86 19.047619
## 2 25601 Female Majority read86 21.428571
## 3 37401 Female Majority read86 21.428571
## 4 40201   Male Majority read86  7.142857
## 5 63501   Male Majority read86 30.952381
## 6 70301   Male Majority read86 17.857143
all_dta <- cbind ((dta_1[,c("id", "sex", "race", "grade", "VALUE_G")]),
                  (dta_2[,c("id","year","VALUE_Y")]),
                  (dta_3[,c("id","month","VALUE_M")]), 
                  (dta_4[,c("id","math","VALUE_MA")]), 
                  (dat_5[,c("id","read", "VALUE_R")]),by= "id")

head(all_dta)
##      id    sex     race   grade VALUE_G    id      year VALUE_Y    id
## 1 23901 Female Majority grade86       0 23901 age86year       6 23901
## 2 25601 Female Majority grade86       0 25601 age86year       6 25601
## 3 37401 Female Majority grade86       0 37401 age86year       6 37401
## 4 40201   Male Majority grade86       0 40201 age86year       5 40201
## 5 63501   Male Majority grade86       1 63501 age86year       7 63501
## 6 70301   Male Majority grade86       0 70301 age86year       5 70301
##        month VALUE_M    id   math  VALUE_MA    id   read   VALUE_R by
## 1 age86month      67 23901 math86 14.285714 23901 read86 19.047619 id
## 2 age86month      66 25601 math86 20.238095 25601 read86 21.428571 id
## 3 age86month      67 37401 math86 17.857143 37401 read86 21.428571 id
## 4 age86month      60 40201 math86  7.142857 40201 read86  7.142857 id
## 5 age86month      78 63501 math86 29.761905 63501 read86 30.952381 id
## 6 age86month      62 70301 math86 14.285714 70301 read86 17.857143 id
long<- all_dta[,c("id","sex","race","VALUE_G","VALUE_Y","VALUE_M","VALUE_MA","VALUE_R")]
head(long)
##      id    sex     race VALUE_G VALUE_Y VALUE_M  VALUE_MA   VALUE_R
## 1 23901 Female Majority       0       6      67 14.285714 19.047619
## 2 25601 Female Majority       0       6      66 20.238095 21.428571
## 3 37401 Female Majority       0       6      67 17.857143 21.428571
## 4 40201   Male Majority       0       5      60  7.142857  7.142857
## 5 63501   Male Majority       1       7      78 29.761905 30.952381
## 6 70301   Male Majority       0       5      62 14.285714 17.857143
library(tidyverse)
## -- Attaching packages ------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2     √ dplyr   1.0.2
## √ tibble  3.0.3     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## √ purrr   0.3.4
## -- Conflicts ---------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
ggplot(data=long, aes(x=VALUE_M, y=VALUE_R, group=id)) +
  geom_point(size=rel(.5)) +
  stat_smooth(mapping = NULL,
              data = NULL,
              geom = "smooth",
              position = "identity",
              method ="lm", 
              formula= y ~ x,
              se=F, 
              fullrange = FALSE,
              level = 0.95,
              color="pink", 
              linetype="dashed", 
              size=rel(.1)) +
  facet_grid(rows = vars(race),
             cols = vars(sex),
             scales = "free",
             space = "free",
             shrink = T,
             labeller = "label_value",
             as.table = T,
             switch = NULL,
             drop = T,
             margins = F,
             facets = NULL)  +
  labs(x="month", y="read") +
  theme_bw()