# input data
dta <- read.csv("C:/Users/USER/Desktop/nlsy86wide.csv")
# inspect data structure
str(dta)
## 'data.frame': 166 obs. of 23 variables:
## $ id : int 23901 25601 37401 40201 63501 70301 72001 76101 76801 77001 ...
## $ sex : chr "Female" "Female" "Female" "Male" ...
## $ race : chr "Majority" "Majority" "Majority" "Majority" ...
## $ grade86 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ grade88 : int 2 1 2 1 3 2 1 3 2 2 ...
## $ grade90 : int 3 3 5 2 4 3 3 4 5 4 ...
## $ grade92 : int 5 6 6 5 6 5 5 6 6 5 ...
## $ age86year : int 6 6 6 5 7 5 6 7 6 6 ...
## $ age88year : int 8 8 8 8 9 8 8 9 9 8 ...
## $ age90year : int 10 10 10 9 11 10 10 11 11 10 ...
## $ age92year : int 12 12 12 12 13 12 12 13 13 12 ...
## $ age86month: int 67 66 67 60 78 62 66 79 76 67 ...
## $ age88month: int 96 95 95 91 108 93 94 109 104 94 ...
## $ age90month: int 119 119 122 112 132 117 118 131 128 117 ...
## $ age92month: int 142 143 144 139 155 139 140 154 151 139 ...
## $ math86 : num 14.29 20.24 17.86 7.14 29.76 ...
## $ math88 : num 15.5 36.9 22.6 21.4 50 ...
## $ math90 : num 38.1 52.4 53.6 53.6 47.6 ...
## $ math92 : num 41.7 58.3 58.3 51.2 71.4 ...
## $ read86 : num 19.05 21.43 21.43 7.14 30.95 ...
## $ read88 : num 29.8 32.1 45.2 21.4 50 ...
## $ read90 : num 28.6 45.2 69 50 63.1 ...
## $ read92 : num 45.2 57.1 78.6 59.5 82.1 ...
# examine first 6 lines
head(dta)
## id sex race grade86 grade88 grade90 grade92 age86year age88year
## 1 23901 Female Majority 0 2 3 5 6 8
## 2 25601 Female Majority 0 1 3 6 6 8
## 3 37401 Female Majority 0 2 5 6 6 8
## 4 40201 Male Majority 0 1 2 5 5 8
## 5 63501 Male Majority 1 3 4 6 7 9
## 6 70301 Male Majority 0 2 3 5 5 8
## age90year age92year age86month age88month age90month age92month math86
## 1 10 12 67 96 119 142 14.285714
## 2 10 12 66 95 119 143 20.238095
## 3 10 12 67 95 122 144 17.857143
## 4 9 12 60 91 112 139 7.142857
## 5 11 13 78 108 132 155 29.761905
## 6 10 12 62 93 117 139 14.285714
## math88 math90 math92 read86 read88 read90 read92
## 1 15.47619 38.09524 41.66667 19.047619 29.76190 28.57143 45.23810
## 2 36.90476 52.38095 58.33333 21.428571 32.14286 45.23810 57.14286
## 3 22.61905 53.57143 58.33333 21.428571 45.23810 69.04762 78.57143
## 4 21.42857 53.57143 51.19048 7.142857 21.42857 50.00000 59.52381
## 5 50.00000 47.61905 71.42857 30.952381 50.00000 63.09524 82.14286
## 6 36.90476 55.95238 63.09524 17.857143 46.42857 64.28571 96.42857
library(tidyr)
l2dta<-gather(dta,
key=grade,value=Value2,
grade86,grade88,grade90,grade92)
l2dta1 <- l2dta[,c("id","sex","race","grade","Value2")]
head(l2dta1)
## id sex race grade Value2
## 1 23901 Female Majority grade86 0
## 2 25601 Female Majority grade86 0
## 3 37401 Female Majority grade86 0
## 4 40201 Male Majority grade86 0
## 5 63501 Male Majority grade86 1
## 6 70301 Male Majority grade86 0
library(tidyr)
l3dta<-gather(dta,
key=year,value=Value3,
age86year,age88year,age90year,age92year)
l3dta1 <- l3dta[,c("id","sex","race","year","Value3")]
head(l3dta1)
## id sex race year Value3
## 1 23901 Female Majority age86year 6
## 2 25601 Female Majority age86year 6
## 3 37401 Female Majority age86year 6
## 4 40201 Male Majority age86year 5
## 5 63501 Male Majority age86year 7
## 6 70301 Male Majority age86year 5
library(tidyr)
l4dta<-gather(dta,
key=month,value=Value4,
age86month,age88month,age90month,age92month,)
l4dta1 <- l4dta[,c("id","sex","race","month","Value4")]
head(l4dta1)
## id sex race month Value4
## 1 23901 Female Majority age86month 67
## 2 25601 Female Majority age86month 66
## 3 37401 Female Majority age86month 67
## 4 40201 Male Majority age86month 60
## 5 63501 Male Majority age86month 78
## 6 70301 Male Majority age86month 62
library(tidyr)
l5dta<-gather(dta,
key=math,value=Value5,
math86,math88,math90,math92,)
l5dta1 <- l5dta[,c("id","sex","race","math","Value5")]
head(l5dta1)
## id sex race math Value5
## 1 23901 Female Majority math86 14.285714
## 2 25601 Female Majority math86 20.238095
## 3 37401 Female Majority math86 17.857143
## 4 40201 Male Majority math86 7.142857
## 5 63501 Male Majority math86 29.761905
## 6 70301 Male Majority math86 14.285714
library(tidyr)
l6dta<-gather(dta,
key=read,value=Value6,
read86,read88,read90,read92,)
l6dta1 <- l6dta[,c("id","sex","race","read","Value6")]
head(l6dta1)
## id sex race read Value6
## 1 23901 Female Majority read86 19.047619
## 2 25601 Female Majority read86 21.428571
## 3 37401 Female Majority read86 21.428571
## 4 40201 Male Majority read86 7.142857
## 5 63501 Male Majority read86 30.952381
## 6 70301 Male Majority read86 17.857143
longdta <- cbind ((l2dta1[,c("id", "sex", "race", "grade", "Value2")]),
(l3dta1[,c("id","year", "Value3")]),
(l4dta1[,c("id","month", "Value4")]),
(l5dta1[,c("id","math", "Value5")]),
(l6dta1[,c("id","read", "Value6")]),by= "id")
head(longdta)
## id sex race grade Value2 id year Value3 id month
## 1 23901 Female Majority grade86 0 23901 age86year 6 23901 age86month
## 2 25601 Female Majority grade86 0 25601 age86year 6 25601 age86month
## 3 37401 Female Majority grade86 0 37401 age86year 6 37401 age86month
## 4 40201 Male Majority grade86 0 40201 age86year 5 40201 age86month
## 5 63501 Male Majority grade86 1 63501 age86year 7 63501 age86month
## 6 70301 Male Majority grade86 0 70301 age86year 5 70301 age86month
## Value4 id math Value5 id read Value6 by
## 1 67 23901 math86 14.285714 23901 read86 19.047619 id
## 2 66 25601 math86 20.238095 25601 read86 21.428571 id
## 3 67 37401 math86 17.857143 37401 read86 21.428571 id
## 4 60 40201 math86 7.142857 40201 read86 7.142857 id
## 5 78 63501 math86 29.761905 63501 read86 30.952381 id
## 6 62 70301 math86 14.285714 70301 read86 17.857143 id
longdtafinal <- longdta[,c("id","sex","race","Value2","Value3","Value4","Value5","Value6")]
head(longdtafinal)
## id sex race Value2 Value3 Value4 Value5 Value6
## 1 23901 Female Majority 0 6 67 14.285714 19.047619
## 2 25601 Female Majority 0 6 66 20.238095 21.428571
## 3 37401 Female Majority 0 6 67 17.857143 21.428571
## 4 40201 Male Majority 0 5 60 7.142857 7.142857
## 5 63501 Male Majority 1 7 78 29.761905 30.952381
## 6 70301 Male Majority 0 5 62 14.285714 17.857143
# plot
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2 √ dplyr 1.0.2
## √ tibble 3.0.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## √ purrr 0.3.4
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ggplot(data=longdtafinal, aes(x=Value4, y=Value6, group=id)) +
geom_point(size=rel(.5)) +
stat_smooth(mapping = NULL,
data = NULL,
geom = "smooth",
position = "identity",
method ="lm",
formula= y ~ x,
se=F,
fullrange = FALSE,
level = 0.95,
color="blue",
linetype=1,
size=rel(.1)) +
facet_grid(rows = vars(race),
cols = vars(sex),
scales = "free",
space = "free",
shrink = T,
labeller = "label_value",
as.table = T,
switch = NULL,
drop = T,
margins = F,
facets = NULL) +
labs(x="Month", y="Reading score") +
theme_bw()

###