## Load the unfold function from the internet
## http://socserv.mcmaster.ca/jfox/Courses/R-course-Berkeley/unfold.R
source("http://socserv.mcmaster.ca/jfox/Courses/R-course-Berkeley/unfold.R")
##
library(dplyr)
## Rossi, et al. 1980. Money, Work and Crime: Some Experimental Results. New York: Academic Press.
## http://cran.r-project.org/doc/contrib/Fox-Companion/Rossi.txt
RossiWide <- read.table(file="http://cran.r-project.org/doc/contrib/Fox-Companion/Rossi.txt", header=TRUE)
## Create the ID variable
RossiWide$ID <- seq_len(nrow(RossiWide))
## Wide-to-long reformat using unfold()
RossiLong <- unfold(RossiWide, time = "week", event = "arrest", cov = 11:62, cov.names = "employed")
rownames(RossiLong) <- NULL
## Two-table DB like RDBMS
timeInvariant <- c("ID","fin","age","race","wexp","mar","paro","prio","educ")
timeVarying <- c("ID","start","stop","arrest.time","employed")
## Create the time-invariant table
RossiLongInvariant <- RossiLong[timeInvariant]
RossiLongInvariant <- RossiLongInvariant[!duplicated(RossiLongInvariant$ID), ]
rownames(RossiLongInvariant) <- NULL
names(RossiLongInvariant) <- c("ID","FinAid","Age","BlackRace","WorkExper","Married","Parole","nPriorArrest","EduLevel")
## Create the time-varying table
RossiLongVarying <- RossiLong[timeVarying]
## Combine and show
RossiRdbms <- list(timeInvariant = RossiLongInvariant,
timeVarying = RossiLongVarying)
lapply(RossiRdbms[1], head, 5)
## $timeInvariant
## ID FinAid Age BlackRace WorkExper Married Parole nPriorArrest EduLevel
## 1 1 0 27 1 0 0 1 3 3
## 2 2 0 18 1 0 0 1 8 4
## 3 3 0 19 0 1 0 1 13 3
## 4 4 1 23 1 1 1 1 1 5
## 5 5 0 19 0 1 0 1 3 3
lapply(RossiRdbms[2], head, 39)
## $timeVarying
## ID start stop arrest.time employed
## 1 1 0 1 0 0
## 2 1 1 2 0 0
## 3 1 2 3 0 0
## 4 1 3 4 0 0
## 5 1 4 5 0 0
## 6 1 5 6 0 0
## 7 1 6 7 0 0
## 8 1 7 8 0 0
## 9 1 8 9 0 0
## 10 1 9 10 0 0
## 11 1 10 11 0 0
## 12 1 11 12 0 0
## 13 1 12 13 0 0
## 14 1 13 14 0 0
## 15 1 14 15 0 0
## 16 1 15 16 0 0
## 17 1 16 17 0 0
## 18 1 17 18 0 0
## 19 1 18 19 0 0
## 20 1 19 20 1 0
## 21 2 0 1 0 0
## 22 2 1 2 0 0
## 23 2 2 3 0 0
## 24 2 3 4 0 0
## 25 2 4 5 0 0
## 26 2 5 6 0 0
## 27 2 6 7 0 0
## 28 2 7 8 0 0
## 29 2 8 9 0 0
## 30 2 9 10 0 1
## 31 2 10 11 0 1
## 32 2 11 12 0 1
## 33 2 12 13 0 1
## 34 2 13 14 0 1
## 35 2 14 15 0 0
## 36 2 15 16 0 0
## 37 2 16 17 1 0
## 38 3 0 1 0 0
## 39 3 1 2 0 0
## Joining
RossiLongNew <- merge(x = RossiRdbms[[1]],
y = RossiRdbms[[2]],
by = "ID",
all.x = FALSE, all.y = FALSE
)
head(RossiLongNew, 39)
## ID FinAid Age BlackRace WorkExper Married Parole nPriorArrest EduLevel start stop arrest.time employed
## 1 1 0 27 1 0 0 1 3 3 0 1 0 0
## 2 1 0 27 1 0 0 1 3 3 1 2 0 0
## 3 1 0 27 1 0 0 1 3 3 2 3 0 0
## 4 1 0 27 1 0 0 1 3 3 3 4 0 0
## 5 1 0 27 1 0 0 1 3 3 4 5 0 0
## 6 1 0 27 1 0 0 1 3 3 5 6 0 0
## 7 1 0 27 1 0 0 1 3 3 6 7 0 0
## 8 1 0 27 1 0 0 1 3 3 7 8 0 0
## 9 1 0 27 1 0 0 1 3 3 8 9 0 0
## 10 1 0 27 1 0 0 1 3 3 9 10 0 0
## 11 1 0 27 1 0 0 1 3 3 10 11 0 0
## 12 1 0 27 1 0 0 1 3 3 11 12 0 0
## 13 1 0 27 1 0 0 1 3 3 12 13 0 0
## 14 1 0 27 1 0 0 1 3 3 13 14 0 0
## 15 1 0 27 1 0 0 1 3 3 14 15 0 0
## 16 1 0 27 1 0 0 1 3 3 15 16 0 0
## 17 1 0 27 1 0 0 1 3 3 16 17 0 0
## 18 1 0 27 1 0 0 1 3 3 17 18 0 0
## 19 1 0 27 1 0 0 1 3 3 18 19 0 0
## 20 1 0 27 1 0 0 1 3 3 19 20 1 0
## 21 2 0 18 1 0 0 1 8 4 0 1 0 0
## 22 2 0 18 1 0 0 1 8 4 1 2 0 0
## 23 2 0 18 1 0 0 1 8 4 2 3 0 0
## 24 2 0 18 1 0 0 1 8 4 3 4 0 0
## 25 2 0 18 1 0 0 1 8 4 4 5 0 0
## 26 2 0 18 1 0 0 1 8 4 5 6 0 0
## 27 2 0 18 1 0 0 1 8 4 6 7 0 0
## 28 2 0 18 1 0 0 1 8 4 7 8 0 0
## 29 2 0 18 1 0 0 1 8 4 8 9 0 0
## 30 2 0 18 1 0 0 1 8 4 9 10 0 1
## 31 2 0 18 1 0 0 1 8 4 10 11 0 1
## 32 2 0 18 1 0 0 1 8 4 11 12 0 1
## 33 2 0 18 1 0 0 1 8 4 12 13 0 1
## 34 2 0 18 1 0 0 1 8 4 13 14 0 1
## 35 2 0 18 1 0 0 1 8 4 14 15 0 0
## 36 2 0 18 1 0 0 1 8 4 15 16 0 0
## 37 2 0 18 1 0 0 1 8 4 16 17 1 0
## 38 3 0 19 0 1 0 1 13 3 0 1 0 0
## 39 3 0 19 0 1 0 1 13 3 1 2 0 0
varsToKeep <- c("ID","FinAid","Age","BlackRace","WorkExper","Married","Parole","nPriorArrest","EduLevel","stop","employed")
## Full
RossiWideNewFull <- reshape(data = RossiLongNew[varsToKeep],
v.names = "employed", # Used in long-to-wide
timevar = "stop",
idvar = "ID",
direction = "wide",
sep = "_"
)
head(RossiWideNewFull)
## ID FinAid Age BlackRace WorkExper Married Parole nPriorArrest EduLevel employed_1 employed_2 employed_3
## 1 1 0 27 1 0 0 1 3 3 0 0 0
## 21 2 0 18 1 0 0 1 8 4 0 0 0
## 38 3 0 19 0 1 0 1 13 3 0 0 0
## 63 4 1 23 1 1 1 1 1 5 0 0 0
## 115 5 0 19 0 1 0 1 3 3 0 0 0
## 167 6 0 24 1 1 0 0 2 4 0 0 0
## employed_4 employed_5 employed_6 employed_7 employed_8 employed_9 employed_10 employed_11 employed_12
## 1 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 1 1 1
## 38 0 0 0 0 0 0 0 0 0
## 63 0 1 1 1 1 1 1 1 1
## 115 0 0 0 0 0 0 1 1 1
## 167 0 1 1 1 1 1 1 0 0
## employed_13 employed_14 employed_15 employed_16 employed_17 employed_18 employed_19 employed_20 employed_21
## 1 0 0 0 0 0 0 0 0 NA
## 21 1 1 0 0 0 NA NA NA NA
## 38 0 0 0 0 1 0 0 0 0
## 63 1 1 1 1 1 1 1 1 1
## 115 1 1 1 1 1 1 1 1 1
## 167 0 0 0 0 0 0 0 0 0
## employed_22 employed_23 employed_24 employed_25 employed_26 employed_27 employed_28 employed_29 employed_30
## 1 NA NA NA NA NA NA NA NA NA
## 21 NA NA NA NA NA NA NA NA NA
## 38 0 0 0 0 NA NA NA NA NA
## 63 0 0 0 0 0 0 0 0 0
## 115 1 1 1 1 1 1 1 1 1
## 167 0 1 1 1 1 1 1 1 0
## employed_31 employed_32 employed_33 employed_34 employed_35 employed_36 employed_37 employed_38 employed_39
## 1 NA NA NA NA NA NA NA NA NA
## 21 NA NA NA NA NA NA NA NA NA
## 38 NA NA NA NA NA NA NA NA NA
## 63 0 1 1 1 1 1 1 1 1
## 115 1 1 1 1 0 0 0 0 0
## 167 0 0 0 0 0 0 0 0 0
## employed_40 employed_41 employed_42 employed_43 employed_44 employed_45 employed_46 employed_47 employed_48
## 1 NA NA NA NA NA NA NA NA NA
## 21 NA NA NA NA NA NA NA NA NA
## 38 NA NA NA NA NA NA NA NA NA
## 63 1 1 1 1 1 1 1 1 1
## 115 0 0 0 0 0 0 0 0 0
## 167 0 0 0 0 0 0 0 0 0
## employed_49 employed_50 employed_51 employed_52
## 1 NA NA NA NA
## 21 NA NA NA NA
## 38 NA NA NA NA
## 63 1 1 1 1
## 115 0 0 0 0
## 167 0 0 0 0
## Summarized
RossiWideNewSumm <- RossiLongNew %>%
dplyr::group_by(ID) %>%
dplyr::mutate(emp_mon1 = sign(sum(employed[stop %in% c(1:4)], na.rm = TRUE)))
RossiWideNewSumm <- RossiWideNewSumm[!duplicated(RossiWideNewSumm$ID), ]
RossiWideNewSumm <- RossiWideNewSumm[c("ID","FinAid","Age","BlackRace","WorkExper","Married","Parole","nPriorArrest","EduLevel","emp_mon1")]
head(as.data.frame(RossiWideNewSumm), 10)
## ID FinAid Age BlackRace WorkExper Married Parole nPriorArrest EduLevel emp_mon1
## 1 1 0 27 1 0 0 1 3 3 0
## 21 2 0 18 1 0 0 1 8 4 0
## 38 3 0 19 0 1 0 1 13 3 0
## 63 4 1 23 1 1 1 1 1 5 0
## 115 5 0 19 0 1 0 1 3 3 0
## 167 6 0 24 1 1 0 0 2 4 0
## 219 7 0 25 1 1 1 1 0 4 1
## 242 8 1 21 1 1 0 1 4 3 0
## 294 9 0 22 1 0 0 0 6 3 0
## 346 10 0 20 1 1 0 0 0 5 1