Reproduce the plot on the NLSY 86 example by completing the R script using the data file in wide format.
The data are drawn from the National Longitudinal Survey of Youth (NLSY). The sample observations are from the 1986, 1988, 1990, and 1992 assessment periods. Children were selected to be in kindergarten, first, and second grade and to be of age 5, 6, or 7 at the first assessment (1986). Both reading and mathematical achievement scores are recorded. The former is a recognition subscore of the Peabody Individual Achievement Test (PIAT). This was scaled as the percentage of 84 items that were answered correctly. The same 84 items were administered at all four time points, providing a consistent scale over time. The data set is a subsample of 166 subjects with complete observations.
Source: Bollen, K.A. & Curran, P.J. (2006). Latent curve models. A structural equation perspective. p.59.
Column 1: Student ID Column 2: Gender, male or female Column 3: Race, minority or majority Column 4: Measurement occasions Column 5: Grade at which measurements were made, Kindergarten = 0, First grade = 1, Second grade = 2 Column 6: Age in years Column 7: Age in months Column 8: Math score Column 9: Reading score
# input data
dta <- read.csv("C:/Users/Ching-Fang Wu/Documents/lmm/nlsy86wide.csv", h=T)
# inspect data structure
str(dta)
'data.frame': 166 obs. of 23 variables:
$ id : int 23901 25601 37401 40201 63501 70301 72001 76101 76801 77001 ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ race : chr "Majority" "Majority" "Majority" "Majority" ...
$ grade86 : int 0 0 0 0 1 0 0 0 0 0 ...
$ grade88 : int 2 1 2 1 3 2 1 3 2 2 ...
$ grade90 : int 3 3 5 2 4 3 3 4 5 4 ...
$ grade92 : int 5 6 6 5 6 5 5 6 6 5 ...
$ age86year : int 6 6 6 5 7 5 6 7 6 6 ...
$ age88year : int 8 8 8 8 9 8 8 9 9 8 ...
$ age90year : int 10 10 10 9 11 10 10 11 11 10 ...
$ age92year : int 12 12 12 12 13 12 12 13 13 12 ...
$ age86month: int 67 66 67 60 78 62 66 79 76 67 ...
$ age88month: int 96 95 95 91 108 93 94 109 104 94 ...
$ age90month: int 119 119 122 112 132 117 118 131 128 117 ...
$ age92month: int 142 143 144 139 155 139 140 154 151 139 ...
$ math86 : num 14.29 20.24 17.86 7.14 29.76 ...
$ math88 : num 15.5 36.9 22.6 21.4 50 ...
$ math90 : num 38.1 52.4 53.6 53.6 47.6 ...
$ math92 : num 41.7 58.3 58.3 51.2 71.4 ...
$ read86 : num 19.05 21.43 21.43 7.14 30.95 ...
$ read88 : num 29.8 32.1 45.2 21.4 50 ...
$ read90 : num 28.6 45.2 69 50 63.1 ...
$ read92 : num 45.2 57.1 78.6 59.5 82.1 ...
# examine first 6 lines
knitr::kable(head(dta))
id | sex | race | grade86 | grade88 | grade90 | grade92 | age86year | age88year | age90year | age92year | age86month | age88month | age90month | age92month | math86 | math88 | math90 | math92 | read86 | read88 | read90 | read92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
23901 | Female | Majority | 0 | 2 | 3 | 5 | 6 | 8 | 10 | 12 | 67 | 96 | 119 | 142 | 14.2857 | 15.476 | 38.095 | 41.667 | 19.0476 | 29.762 | 28.571 | 45.238 |
25601 | Female | Majority | 0 | 1 | 3 | 6 | 6 | 8 | 10 | 12 | 66 | 95 | 119 | 143 | 20.2381 | 36.905 | 52.381 | 58.333 | 21.4286 | 32.143 | 45.238 | 57.143 |
37401 | Female | Majority | 0 | 2 | 5 | 6 | 6 | 8 | 10 | 12 | 67 | 95 | 122 | 144 | 17.8571 | 22.619 | 53.571 | 58.333 | 21.4286 | 45.238 | 69.048 | 78.571 |
40201 | Male | Majority | 0 | 1 | 2 | 5 | 5 | 8 | 9 | 12 | 60 | 91 | 112 | 139 | 7.1429 | 21.429 | 53.571 | 51.190 | 7.1429 | 21.429 | 50.000 | 59.524 |
63501 | Male | Majority | 1 | 3 | 4 | 6 | 7 | 9 | 11 | 13 | 78 | 108 | 132 | 155 | 29.7619 | 50.000 | 47.619 | 71.429 | 30.9524 | 50.000 | 63.095 | 82.143 |
70301 | Male | Majority | 0 | 2 | 3 | 5 | 5 | 8 | 10 | 12 | 62 | 93 | 117 | 139 | 14.2857 | 36.905 | 55.952 | 63.095 | 17.8571 | 46.429 | 64.286 | 96.429 |
library(dplyr)
#install.packages("tidyr")
library(tidyr)
# 寬資料轉換成長資料
long_grade <- dta %>% gather (key = grade_y, value = grade, grade86:grade92)
longgrade <- long_grade[,c("id","sex","race","grade_y","grade")]
long_ageyear <- dta %>% gather (key = age_y, value = ageyear, age86year:age92year)
longageyear <- long_ageyear[,c("id","sex","race","age_y","ageyear")]
long_agemonth <- dta %>% gather (key = age_m, value = agemonth, age86month:age92month)
longagemonth <- long_agemonth[,c("id","sex","race","age_m","agemonth")]
long_math <- dta %>% gather (key = math_y, value = math, math86:math92)
longmath <- long_math[,c("id","sex","race","math_y","math")]
long_read <- dta %>% gather (key = read_y, value = read, read86:read92)
longread <- long_read[,c("id","sex","race","read_y","read")]
longdta <- cbind ((longgrade[,c("id","sex","race","grade_y","grade")]),
(longageyear[,c("id","age_y","ageyear")]),
(longagemonth[,c("id","age_m","agemonth")]),
(longmath[,c("id","math_y","math")]),
(longread[,c("id","read_y","read")]),by= "id")
longdtafinal<-longdta[,c("id","sex","race","ageyear","agemonth","math","read")]
head(longdtafinal)
id sex race ageyear agemonth math read
1 23901 Female Majority 6 67 14.2857 19.0476
2 25601 Female Majority 6 66 20.2381 21.4286
3 37401 Female Majority 6 67 17.8571 21.4286
4 40201 Male Majority 5 60 7.1429 7.1429
5 63501 Male Majority 7 78 29.7619 30.9524
6 70301 Male Majority 5 62 14.2857 17.8571
library(tidyverse)
# plot
ggplot(data=longdtafinal, aes(x=agemonth, y=read, group=id)) +
geom_point(size=rel(.5)) +
stat_smooth(mapping = NULL,
data = NULL,
geom = "smooth",
position = "identity",
method ="lm",
formula= y ~ x,
se=F,
fullrange = FALSE,
level = 0.95,
color="skyblue2",
linetype=1,
size=rel(.1)) +
facet_grid(rows = vars(race),
cols = vars(sex),
scales = "free",
space = "free",
shrink = T,
labeller = "label_value",
as.table = T,
switch = NULL,
drop = T,
margins = T, #F就變成原本的四格圖
facets = NULL) +
labs(x="Month", y="Reading score") +
theme_bw()
ggplot(data=longdtafinal, aes(x=agemonth, y=read, group=id)) +
geom_point(size=rel(.5)) +
stat_smooth(mapping = NULL,
data = NULL,
geom = "smooth",
position = "identity",
method ="lm",
formula= y ~ x,
se=F,
fullrange = FALSE,
level = 0.95,
color="skyblue2",
linetype=1,
size=rel(.1)) +
facet_grid(rows = vars(race),
cols = vars(sex),
scales = "free",
space = "free",
shrink = T,
labeller = "label_value",
as.table = T,
switch = NULL,
drop = T,
margins = F,
facets = NULL) +
labs(x="Month", y="Reading score") +
theme_bw()