Code
library(readr)
getwd()
[1] "C:/Users/jacob/University of Texas at San Antonio/TEAM - Cossman Crew - General"
Code
setwd("C:/Users/jacob/Downloads")
<-read.csv("MHAS0121 NO LABELS NO NEGATIVE TIMES (2).csv")
mhas0121dummies
<-mhas0121dummies
mhas0121noNA
#update.packages()
#Split data by person-month
library(survival)
library(dplyr)
<-mhas0121noNA %>%
mhas0121noNAmutate(
startmo = round(ageint*12),
endmo = round(agecensor*12),
dead = died0121
)head(mhas0121noNA, n= 10)
id locsize01 perwght01 died18 died21 mobirth yrbirth female moint yrint
1 1 1 300 NA NA 10 1941 0 8 2001
2 2 1 253 0 0 6 1931 0 7 2001
3 3 1 454 0 0 6 1950 0 7 2001
4 4 1 337 NA NA 10 1936 0 7 2001
5 5 1 295 0 0 3 1944 1 7 2001
6 6 1 253 0 0 5 1948 1 7 2001
7 7 1 490 NA NA 8 1927 0 8 2001
8 8 1 537 0 0 2 1945 0 7 2001
9 9 1 370 1 NA 9 1946 1 7 2001
10 10 1 237 0 NA 1 1948 0 7 2001
schooling died0103 refused0103 lost0103 followup0103 died0312 refused0312
1 0 0 0 0 1 0 0
2 0 0 0 0 1 0 0
3 3 0 0 0 1 0 0
4 0 0 0 0 1 1 0
5 0 0 0 0 1 0 0
6 0 0 0 0 1 0 0
7 6 0 0 0 1 1 0
8 3 0 0 0 1 0 0
9 1 0 0 0 1 0 0
10 3 0 0 0 1 0 0
lost0312 followup0312 died1215 refused1215 lost1215 followup1215 died1518
1 0 1 1 0 0 0 NA
2 0 1 0 0 0 1 0
3 0 1 0 0 0 1 0
4 0 0 NA 0 NA NA NA
5 0 1 0 0 0 1 0
6 0 1 0 0 0 1 0
7 0 0 NA 0 NA NA NA
8 0 1 0 0 0 1 0
9 0 1 0 0 0 1 1
10 0 1 0 0 NA NA 0
refused1518 lost1518 followup1518 died1821 refused1821 lost1821 followup1821
1 0 NA NA NA 0 NA NA
2 0 0 1 0 0 0 1
3 0 0 1 0 0 0 1
4 0 NA NA NA 0 NA NA
5 0 0 1 0 0 0 1
6 0 0 1 0 0 0 1
7 0 NA NA NA 0 NA NA
8 0 0 1 0 0 0 1
9 0 0 0 NA 0 NA NA
10 0 NA NA NA 0 NA NA
died0121 mocensor yrcensor ageint agecensor ageintexact exposure_months
1 1 11 2015 59.83333 74.08334 59.83333 171
2 0 12 2021 70.08334 90.50000 70.08334 245
3 0 12 2021 51.08333 71.50000 51.08333 245
4 1 6 2011 64.75000 74.66666 64.75000 119
5 0 12 2021 57.33333 77.75000 57.33333 245
6 0 12 2021 53.16667 73.58334 53.16667 245
7 1 5 2012 74.00000 84.75000 74.00000 129
8 0 1 2022 56.41667 76.91666 56.41667 246
9 1 2 2015 54.83333 68.41666 54.83333 163
10 NA 11 2012 53.50000 64.83334 53.50000 136
educlevel ageintmo agecensormo educ1 educ2 educ3 educ4 locsize011 locsize012
1 0 718 889 1 0 0 0 1 0
2 0 841 1086 1 0 0 0 1 0
3 15 613 858 0 1 0 0 1 0
4 0 777 896 1 0 0 0 1 0
5 0 688 933 1 0 0 0 1 0
6 0 638 883 1 0 0 0 1 0
7 68 888 1017 0 0 1 0 1 0
8 15 677 923 0 1 0 0 1 0
9 15 658 821 0 1 0 0 1 0
10 15 642 778 0 1 0 0 1 0
locsize013 locsize014 startmo endmo dead
1 0 0 718 889 1
2 0 0 841 1086 0
3 0 0 613 858 0
4 0 0 777 896 1
5 0 0 688 933 0
6 0 0 638 883 0
7 0 0 888 1017 1
8 0 0 677 923 0
9 0 0 658 821 1
10 0 0 642 778 NA
Code
max(mhas0121noNA$exposure_months)
[1] NA
Code
#Possible max time is 296 months, but using an even higher threshold in case things change
#rm(mhas_personmonth)
# Code adapted from Mills (2011): section 3.5
# Expands data into person-months, up to maximum number age in months observed in data
<-survSplit(mhas0121noNA, cut=c(1:2000), start="startmo", end="endmo", event="dead")
mhas_personmonth
# Sorting data by ID
<-mhas_personmonth[order (mhas_personmonth$id, mhas_personmonth$ageintmo),]
mhas_personmonth
#Creating time-varying age variables for different model specifications0
<-mhas_personmonth %>%
mhas_personmonthmutate(
age=startmo/12, #could truncate if one wanted to, but I don't
agesq=age^2,
agectr=age-50,
ageln=logb(age),
.after = "died0121"
)
library(dplyr)
library(ggplot2)
#install.packages("AMR")
library(AMR)
<- mhas_personmonth %>%
mhas_personmonth mutate(
# Create age groups
age5 = dplyr::case_when(
>= 50 & age < 55 ~ "50-54",
age >= 55 & age < 60 ~ "55-59",
age >= 60 & age < 65 ~ "60-64",
age >= 65 & age < 70 ~ "65-69",
age >= 70 & age < 75 ~ "70-74",
age >= 75 & age < 80 ~ "75-79",
age >= 80 & age < 85 ~ "80-84",
age >= 85 ~ "85+"
age
),# Convert to factor
age5 = factor(
age5,level = c("50-54", "55-59","60-64","65-69","70-74","75-79","80-84","85+")
)
)
<- mhas_personmonth %>%
mhas_personmonth mutate(age5b = case_when(age >= 50 & age < 55 ~ "50-54",
>= 55 & age < 60 ~ "55-59",
age >= 60 & age < 65 ~ "60-64",
age >= 65 & age < 70 ~ "65-69",
age >= 70 & age < 75 ~ "70-74",
age >= 75 & age < 80 ~ "75-79",
age >= 80 & age < 85 ~ "80-84",
age >= 85 ~ "85+"),
age age5b = factor(age5, level = c("50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+")))
#write.csv(mhas_personmonth,"C:\\Users\\eay621\\OneDrive - University of Texas at San Antonio\\3. TEACHING\\EVENT HISTORY ANALYSIS\\EHA Fall 2024\\4. Problem sets\\MHAS0121_PERSONMONTHS.csv", row.names = TRUE)
# Export to CSV to explore more easily (at least for me)
#testPMdata <- mhas_personmonth[which(mhas_personmonth$id < 20, ]
#testPMdata <- subset(mhas_personmonth, id < 50, select=c(id,ageint,agecensor,startmo,endmo,age,age5,died0121,dead))
#write.csv(testPMdata,"C:\\Users\\eay621\\OneDrive - University of Texas at San Antonio\\3. TEACHING\\EVENT HISTORY ANALYSIS\\EHA Fall 2024\\4. Problem sets\\MHAS0121_PERSONMONTHS_TEST.csv", row.names = TRUE)
$locsize01 <- as.factor(mhas_personmonth$locsize01)
mhas_personmonth$female <- as.factor(mhas_personmonth$female) mhas_personmonth