title: “Homework #3 NLSY 86 example reproduction” author: “Szu-Yu Chen” date: 21 September 2020
dta <- read.csv("C:/Users/ASUS/Desktop/data/nlsy86wide.csv")
library(tidyr)
dta_1<-gather(dta,key=grade,value=VALUE_G,grade86:grade92)
dta_1 <-dta_1[,c("id","sex","race","grade","VALUE_G")]
head(dta_1)
## id sex race grade VALUE_G
## 1 23901 Female Majority grade86 0
## 2 25601 Female Majority grade86 0
## 3 37401 Female Majority grade86 0
## 4 40201 Male Majority grade86 0
## 5 63501 Male Majority grade86 1
## 6 70301 Male Majority grade86 0
dta_2<-gather(dta,key=year,value=VALUE_Y,age86year:age92year)
dta_2 <- dta_2[,c("id","sex","race","year","VALUE_Y")]
head(dta_2)
## id sex race year VALUE_Y
## 1 23901 Female Majority age86year 6
## 2 25601 Female Majority age86year 6
## 3 37401 Female Majority age86year 6
## 4 40201 Male Majority age86year 5
## 5 63501 Male Majority age86year 7
## 6 70301 Male Majority age86year 5
dta_3<-gather(dta,key=month,value=VALUE_M, age86month:age92month)
dta_3 <-dta_3[,c("id","sex","race","month","VALUE_M")]
head(dta_3)
## id sex race month VALUE_M
## 1 23901 Female Majority age86month 67
## 2 25601 Female Majority age86month 66
## 3 37401 Female Majority age86month 67
## 4 40201 Male Majority age86month 60
## 5 63501 Male Majority age86month 78
## 6 70301 Male Majority age86month 62
dta_4<-gather(dta,key=math,value=VALUE_MA, math86:math92)
dta_4 <- dta_4[,c("id","sex","race","math","VALUE_MA")]
head(dta_4)
## id sex race math VALUE_MA
## 1 23901 Female Majority math86 14.285714
## 2 25601 Female Majority math86 20.238095
## 3 37401 Female Majority math86 17.857143
## 4 40201 Male Majority math86 7.142857
## 5 63501 Male Majority math86 29.761905
## 6 70301 Male Majority math86 14.285714
dat_5<-gather(dta,key=read,value=VALUE_R,read86:read92)
dat_5<-dat_5[,c("id","sex","race","read","VALUE_R")]
head(dat_5)
## id sex race read VALUE_R
## 1 23901 Female Majority read86 19.047619
## 2 25601 Female Majority read86 21.428571
## 3 37401 Female Majority read86 21.428571
## 4 40201 Male Majority read86 7.142857
## 5 63501 Male Majority read86 30.952381
## 6 70301 Male Majority read86 17.857143
all_dta <- cbind ((dta_1[,c("id", "sex", "race", "grade", "VALUE_G")]),
(dta_2[,c("id","year","VALUE_Y")]),
(dta_3[,c("id","month","VALUE_M")]),
(dta_4[,c("id","math","VALUE_MA")]),
(dat_5[,c("id","read", "VALUE_R")]),by= "id")
head(all_dta)
## id sex race grade VALUE_G id year VALUE_Y id
## 1 23901 Female Majority grade86 0 23901 age86year 6 23901
## 2 25601 Female Majority grade86 0 25601 age86year 6 25601
## 3 37401 Female Majority grade86 0 37401 age86year 6 37401
## 4 40201 Male Majority grade86 0 40201 age86year 5 40201
## 5 63501 Male Majority grade86 1 63501 age86year 7 63501
## 6 70301 Male Majority grade86 0 70301 age86year 5 70301
## month VALUE_M id math VALUE_MA id read VALUE_R by
## 1 age86month 67 23901 math86 14.285714 23901 read86 19.047619 id
## 2 age86month 66 25601 math86 20.238095 25601 read86 21.428571 id
## 3 age86month 67 37401 math86 17.857143 37401 read86 21.428571 id
## 4 age86month 60 40201 math86 7.142857 40201 read86 7.142857 id
## 5 age86month 78 63501 math86 29.761905 63501 read86 30.952381 id
## 6 age86month 62 70301 math86 14.285714 70301 read86 17.857143 id
long<- all_dta[,c("id","sex","race","VALUE_G","VALUE_Y","VALUE_M","VALUE_MA","VALUE_R")]
head(long)
## id sex race VALUE_G VALUE_Y VALUE_M VALUE_MA VALUE_R
## 1 23901 Female Majority 0 6 67 14.285714 19.047619
## 2 25601 Female Majority 0 6 66 20.238095 21.428571
## 3 37401 Female Majority 0 6 67 17.857143 21.428571
## 4 40201 Male Majority 0 5 60 7.142857 7.142857
## 5 63501 Male Majority 1 7 78 29.761905 30.952381
## 6 70301 Male Majority 0 5 62 14.285714 17.857143
library(tidyverse)
## -- Attaching packages ------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2 √ dplyr 1.0.2
## √ tibble 3.0.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## √ purrr 0.3.4
## -- Conflicts ---------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ggplot(data=long, aes(x=VALUE_M, y=VALUE_R, group=id)) +
geom_point(size=rel(.5)) +
stat_smooth(mapping = NULL,
data = NULL,
geom = "smooth",
position = "identity",
method ="lm",
formula= y ~ x,
se=F,
fullrange = FALSE,
level = 0.95,
color="pink",
linetype="dashed",
size=rel(.1)) +
facet_grid(rows = vars(race),
cols = vars(sex),
scales = "free",
space = "free",
shrink = T,
labeller = "label_value",
as.table = T,
switch = NULL,
drop = T,
margins = F,
facets = NULL) +
labs(x="month", y="read") +
theme_bw()