# input data
dta <- read.csv("C:/Users/USER/Desktop/nlsy86wide.csv")

# inspect data structure
str(dta)
## 'data.frame':    166 obs. of  23 variables:
##  $ id        : int  23901 25601 37401 40201 63501 70301 72001 76101 76801 77001 ...
##  $ sex       : chr  "Female" "Female" "Female" "Male" ...
##  $ race      : chr  "Majority" "Majority" "Majority" "Majority" ...
##  $ grade86   : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ grade88   : int  2 1 2 1 3 2 1 3 2 2 ...
##  $ grade90   : int  3 3 5 2 4 3 3 4 5 4 ...
##  $ grade92   : int  5 6 6 5 6 5 5 6 6 5 ...
##  $ age86year : int  6 6 6 5 7 5 6 7 6 6 ...
##  $ age88year : int  8 8 8 8 9 8 8 9 9 8 ...
##  $ age90year : int  10 10 10 9 11 10 10 11 11 10 ...
##  $ age92year : int  12 12 12 12 13 12 12 13 13 12 ...
##  $ age86month: int  67 66 67 60 78 62 66 79 76 67 ...
##  $ age88month: int  96 95 95 91 108 93 94 109 104 94 ...
##  $ age90month: int  119 119 122 112 132 117 118 131 128 117 ...
##  $ age92month: int  142 143 144 139 155 139 140 154 151 139 ...
##  $ math86    : num  14.29 20.24 17.86 7.14 29.76 ...
##  $ math88    : num  15.5 36.9 22.6 21.4 50 ...
##  $ math90    : num  38.1 52.4 53.6 53.6 47.6 ...
##  $ math92    : num  41.7 58.3 58.3 51.2 71.4 ...
##  $ read86    : num  19.05 21.43 21.43 7.14 30.95 ...
##  $ read88    : num  29.8 32.1 45.2 21.4 50 ...
##  $ read90    : num  28.6 45.2 69 50 63.1 ...
##  $ read92    : num  45.2 57.1 78.6 59.5 82.1 ...
# examine first 6 lines
head(dta)
##      id    sex     race grade86 grade88 grade90 grade92 age86year age88year
## 1 23901 Female Majority       0       2       3       5         6         8
## 2 25601 Female Majority       0       1       3       6         6         8
## 3 37401 Female Majority       0       2       5       6         6         8
## 4 40201   Male Majority       0       1       2       5         5         8
## 5 63501   Male Majority       1       3       4       6         7         9
## 6 70301   Male Majority       0       2       3       5         5         8
##   age90year age92year age86month age88month age90month age92month    math86
## 1        10        12         67         96        119        142 14.285714
## 2        10        12         66         95        119        143 20.238095
## 3        10        12         67         95        122        144 17.857143
## 4         9        12         60         91        112        139  7.142857
## 5        11        13         78        108        132        155 29.761905
## 6        10        12         62         93        117        139 14.285714
##     math88   math90   math92    read86   read88   read90   read92
## 1 15.47619 38.09524 41.66667 19.047619 29.76190 28.57143 45.23810
## 2 36.90476 52.38095 58.33333 21.428571 32.14286 45.23810 57.14286
## 3 22.61905 53.57143 58.33333 21.428571 45.23810 69.04762 78.57143
## 4 21.42857 53.57143 51.19048  7.142857 21.42857 50.00000 59.52381
## 5 50.00000 47.61905 71.42857 30.952381 50.00000 63.09524 82.14286
## 6 36.90476 55.95238 63.09524 17.857143 46.42857 64.28571 96.42857
library(tidyr)
l2dta<-gather(dta,
              key=grade,value=Value2,
              grade86,grade88,grade90,grade92) 
head(l2dta)
##      id    sex     race age86year age88year age90year age92year age86month
## 1 23901 Female Majority         6         8        10        12         67
## 2 25601 Female Majority         6         8        10        12         66
## 3 37401 Female Majority         6         8        10        12         67
## 4 40201   Male Majority         5         8         9        12         60
## 5 63501   Male Majority         7         9        11        13         78
## 6 70301   Male Majority         5         8        10        12         62
##   age88month age90month age92month    math86   math88   math90   math92
## 1         96        119        142 14.285714 15.47619 38.09524 41.66667
## 2         95        119        143 20.238095 36.90476 52.38095 58.33333
## 3         95        122        144 17.857143 22.61905 53.57143 58.33333
## 4         91        112        139  7.142857 21.42857 53.57143 51.19048
## 5        108        132        155 29.761905 50.00000 47.61905 71.42857
## 6         93        117        139 14.285714 36.90476 55.95238 63.09524
##      read86   read88   read90   read92   grade Value2
## 1 19.047619 29.76190 28.57143 45.23810 grade86      0
## 2 21.428571 32.14286 45.23810 57.14286 grade86      0
## 3 21.428571 45.23810 69.04762 78.57143 grade86      0
## 4  7.142857 21.42857 50.00000 59.52381 grade86      0
## 5 30.952381 50.00000 63.09524 82.14286 grade86      1
## 6 17.857143 46.42857 64.28571 96.42857 grade86      0
library(tidyr)
l3dta<-gather(dta,
              key=year,value=Value3,
              age86year,age88year,age90year,age92year) 
head(l3dta)
##      id    sex     race grade86 grade88 grade90 grade92 age86month age88month
## 1 23901 Female Majority       0       2       3       5         67         96
## 2 25601 Female Majority       0       1       3       6         66         95
## 3 37401 Female Majority       0       2       5       6         67         95
## 4 40201   Male Majority       0       1       2       5         60         91
## 5 63501   Male Majority       1       3       4       6         78        108
## 6 70301   Male Majority       0       2       3       5         62         93
##   age90month age92month    math86   math88   math90   math92    read86   read88
## 1        119        142 14.285714 15.47619 38.09524 41.66667 19.047619 29.76190
## 2        119        143 20.238095 36.90476 52.38095 58.33333 21.428571 32.14286
## 3        122        144 17.857143 22.61905 53.57143 58.33333 21.428571 45.23810
## 4        112        139  7.142857 21.42857 53.57143 51.19048  7.142857 21.42857
## 5        132        155 29.761905 50.00000 47.61905 71.42857 30.952381 50.00000
## 6        117        139 14.285714 36.90476 55.95238 63.09524 17.857143 46.42857
##     read90   read92      year Value3
## 1 28.57143 45.23810 age86year      6
## 2 45.23810 57.14286 age86year      6
## 3 69.04762 78.57143 age86year      6
## 4 50.00000 59.52381 age86year      5
## 5 63.09524 82.14286 age86year      7
## 6 64.28571 96.42857 age86year      5
library(tidyr)
l4dta<-gather(dta,
              key=month,value=Value4,
              age86month,age88month,age90month,age92month,)
head(l4dta)   
##      id    sex     race grade86 grade88 grade90 grade92 age86year age88year
## 1 23901 Female Majority       0       2       3       5         6         8
## 2 25601 Female Majority       0       1       3       6         6         8
## 3 37401 Female Majority       0       2       5       6         6         8
## 4 40201   Male Majority       0       1       2       5         5         8
## 5 63501   Male Majority       1       3       4       6         7         9
## 6 70301   Male Majority       0       2       3       5         5         8
##   age90year age92year    math86   math88   math90   math92    read86   read88
## 1        10        12 14.285714 15.47619 38.09524 41.66667 19.047619 29.76190
## 2        10        12 20.238095 36.90476 52.38095 58.33333 21.428571 32.14286
## 3        10        12 17.857143 22.61905 53.57143 58.33333 21.428571 45.23810
## 4         9        12  7.142857 21.42857 53.57143 51.19048  7.142857 21.42857
## 5        11        13 29.761905 50.00000 47.61905 71.42857 30.952381 50.00000
## 6        10        12 14.285714 36.90476 55.95238 63.09524 17.857143 46.42857
##     read90   read92      month Value4
## 1 28.57143 45.23810 age86month     67
## 2 45.23810 57.14286 age86month     66
## 3 69.04762 78.57143 age86month     67
## 4 50.00000 59.52381 age86month     60
## 5 63.09524 82.14286 age86month     78
## 6 64.28571 96.42857 age86month     62
library(tidyr)
l5dta<-gather(dta,
              key=math,value=Value5,
              math86,math88,math90,math92,)
head(l5dta)
##      id    sex     race grade86 grade88 grade90 grade92 age86year age88year
## 1 23901 Female Majority       0       2       3       5         6         8
## 2 25601 Female Majority       0       1       3       6         6         8
## 3 37401 Female Majority       0       2       5       6         6         8
## 4 40201   Male Majority       0       1       2       5         5         8
## 5 63501   Male Majority       1       3       4       6         7         9
## 6 70301   Male Majority       0       2       3       5         5         8
##   age90year age92year age86month age88month age90month age92month    read86
## 1        10        12         67         96        119        142 19.047619
## 2        10        12         66         95        119        143 21.428571
## 3        10        12         67         95        122        144 21.428571
## 4         9        12         60         91        112        139  7.142857
## 5        11        13         78        108        132        155 30.952381
## 6        10        12         62         93        117        139 17.857143
##     read88   read90   read92   math    Value5
## 1 29.76190 28.57143 45.23810 math86 14.285714
## 2 32.14286 45.23810 57.14286 math86 20.238095
## 3 45.23810 69.04762 78.57143 math86 17.857143
## 4 21.42857 50.00000 59.52381 math86  7.142857
## 5 50.00000 63.09524 82.14286 math86 29.761905
## 6 46.42857 64.28571 96.42857 math86 14.285714
library(tidyr)
l6dta<-gather(dta,
              key=read,value=Value6,
              read86,read88,read90,read92,)
head(l6dta)
##      id    sex     race grade86 grade88 grade90 grade92 age86year age88year
## 1 23901 Female Majority       0       2       3       5         6         8
## 2 25601 Female Majority       0       1       3       6         6         8
## 3 37401 Female Majority       0       2       5       6         6         8
## 4 40201   Male Majority       0       1       2       5         5         8
## 5 63501   Male Majority       1       3       4       6         7         9
## 6 70301   Male Majority       0       2       3       5         5         8
##   age90year age92year age86month age88month age90month age92month    math86
## 1        10        12         67         96        119        142 14.285714
## 2        10        12         66         95        119        143 20.238095
## 3        10        12         67         95        122        144 17.857143
## 4         9        12         60         91        112        139  7.142857
## 5        11        13         78        108        132        155 29.761905
## 6        10        12         62         93        117        139 14.285714
##     math88   math90   math92   read    Value6
## 1 15.47619 38.09524 41.66667 read86 19.047619
## 2 36.90476 52.38095 58.33333 read86 21.428571
## 3 22.61905 53.57143 58.33333 read86 21.428571
## 4 21.42857 53.57143 51.19048 read86  7.142857
## 5 50.00000 47.61905 71.42857 read86 30.952381
## 6 36.90476 55.95238 63.09524 read86 17.857143
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
newdta <- l2dta %>% 
  select(id, sex, race, grade, Value2)

newdta2 <- l3dta %>% 
  select(year, Value3)

newdta3 <- l4dta %>% 
  select(month, Value4)

newdta4 <- l5dta %>% 
  select(math, Value5)

newdta5 <- l6dta %>% 
  select(read, Value6)

longdta<-cbind(newdta, newdta2, newdta3, newdta4, newdta5)

library(ggplot2)

ggplot(data=longdta, aes(x=month, y=read, group=id)) +
  geom_point(size=rel(.5)) +
  stat_smooth(method ="lm", formula=y ~ x, se=F) +
  facet_grid(race ~ sex) +
  labs(x="Month", y="Reading score") +
  theme_bw()

###