Replication of the NLSY97 results for the purposes of honesty. Some things had to change (e.g. parental occupational status was used instead of income to measure parental SES), otherwise it’s largely just the same results, just in a similar dataset that was made 20 years earlier.
setwd('~')
Warning: The working directory was changed to /home/asuka inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
setwd('Documents/rstuff/psuccess97/predsucc3')
new_data <- read.csv('new_data.csv')
#new_data <- new_data %>% filter(R0173600 < 9)
rankings <- list()
rankings <- new_data %>% group_by(R0008300) %>% summarise(status = mean(R0007900, na.rm=T))
temp <- paste0('father', '_rank')
new_data$temp_key <- new_data[['R0008300']]
new_data <- new_data %>%
left_join(rankings, by = c("temp_key" = "R0008300")) %>%
mutate(!!temp := status) %>%
select(-status)
rankings <- list()
rankings <- new_data %>% group_by(R0006900) %>% summarise(status = mean(R0006500, na.rm=T))
names(rankings)
temp <- paste0('mother', '_rank')
new_data$temp_key <- new_data[['R0006900']]
new_data <- new_data %>%
left_join(rankings, by = c("temp_key" = "R0006900")) %>%
mutate(!!temp := status) %>%
select(-status)
cor.test(new_data$father_rank, new_data$R0006500)
cor.test(new_data$father_rank, new_data$R0007900)
lr <- lm(data=new_data, R0006500 ~ as.factor(R0008300))
summary(lr)
new_data$pses <- getpc(new_data %>% select(father_rank, mother_rank, R0006500, R0007900), dofa=F, normalizeit=T, fillmissing=T)
psych::reliability(new_data %>% select(father_rank, mother_rank, R0006500, R0007900))
new_data$age2024 <- 124 - new_data$R0000500 - new_data$R0000300/12
new_data$Female <- new_data$R0214800-1
new_data$race <- as.factor(new_data$R0214700)
############################
column_mappings_asvab <- c(
R0615000 = "GS", R0615100 = "AR", R0615200 = "WK",
R0615300 = "PC", R0615400 = "NO", R0615500 = "CS",
R0615600 = "AS", R0615700 = "MK", R0615800 = "MC",
R0615900 = "EI"
)
names_to_change_asvab <- names(new_data) %in% names(column_mappings_asvab)
names(new_data)[names_to_change_asvab] <- column_mappings_asvab[names(new_data)[names_to_change_asvab]]
print(names(new_data))
min((new_data$R0000500+1900), na.rm=T)
max(1981-(new_data$R0000500+1900), na.rm=T)
new_data$agetemp <- 1981-new_data$R0000500
iqtests <- c('GS', 'AR', 'WK', 'PC', 'NO', 'CS', 'AS', 'MK', 'MC', 'EI')
for(col in iqtests) {
new_data[[col]][!is.na(new_data[[col]])] <- agecorrect(col, agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
}
new_data$g = getpc(new_data %>% select(iqtests), dofa=F, fillmissing=F, normalizeit=T)
new_data$IQ <- new_data$g*15 + 100
new_data$Female
new_data$sex <- NA
new_data$sex[new_data$Female==1] <- 'Female'
new_data$sex[new_data$Female==0] <- 'Male'
sd((new_data %>% filter(Female==1))$IQ, na.rm=T)
sd((new_data %>% filter(Female==0))$IQ, na.rm=T)
15.96032/13.76289
p <- GG_denhist(new_data, var='IQ', group='sex', bins=25) +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))
p
file_name <- paste0('', "plot_", 'paper9', ".png")
ggsave(filename = file_name, plot = p, dpi=400)
############################
###################
column_mappings <- c(
R0155400 = "inc1979", R0312300 = "inc1980", R0482600 = "inc1981",
R0782100 = "inc1982_1", R0782101 = "inc1982_2", R1024000 = "inc1983_1",
R1024001 = "inc1983_2", R1410700 = "inc1984_1", R1410701 = "inc1984_2",
R1778500 = "inc1985_1", R1778501 = "inc1985_2", R2141600 = "inc1986_1",
R2141601 = "inc1986_2", R2350300 = "inc1987_1", R2350301 = "inc1987_2",
R2722500 = "inc1988_1", R2722501 = "inc1988_2", R2971400 = "inc1989_1",
R2971401 = "inc1989_2", R3279400 = "inc1990_1", R3279401 = "inc1990_2",
R3559000 = "inc1991_1", R3559001 = "inc1991_2", R3897100 = "inc1992_1",
R3897101 = "inc1992_2", R4295100 = "inc1993_1", R4295101 = "inc1993_2",
R4982800 = "inc1994_1", R4982801 = "inc1994_2", R5626200 = "inc1996_1",
R5626201 = "inc1996_2", R6364600 = "inc1998_1", R6364601 = "inc1998_2",
R6909700 = "inc2000_1", R6909701 = "inc2000_2", R7607800 = "inc2002",
R8316300 = "inc2004", T0912400 = "inc2006", T2076700 = "inc2008",
T3045300 = "inc2010", T3977400 = "inc2012", T4915800 = "inc2014",
T5619500 = "inc2016", T8115400 = "inc2018", T8645700 = "inc2020"
)
names_to_change <- names(new_data) %in% names(column_mappings)
names(new_data)[names_to_change] <- column_mappings[names(new_data)[names_to_change]]
print(names(new_data))
##########
column_mappings_occ <- c(
R0338300 = "occ1980_01_1", R0349800 = "occ1980_02_1", R0361300 = "occ1980_03_1",
R0372800 = "occ1980_04_1", R0384300 = "occ1980_05_1", R0546000 = "occ1981_01_1",
R0559100 = "occ1981_02_1", R0572200 = "occ1981_03_1", R0585300 = "occ1981_04_1",
R0598400 = "occ1981_05_1", R0840500 = "occ1982_01_1", R0853600 = "occ1982_02_1",
R0866700 = "occ1982_03_1", R0892900 = "occ1982_05_1", R1087700 = "occ1983_01_1",
R1100900 = "occ1983_02_1", R1114100 = "occ1983_03_1", R1127300 = "occ1983_04_1",
R1140500 = "occ1983_05_1", R1463400 = "occ1984_01_1", R1476500 = "occ1984_02_1",
R1489600 = "occ1984_03_1", R1502700 = "occ1984_04_1", R1515800 = "occ1984_05_1",
R1810200 = "occ1985_01_1", R1822900 = "occ1985_02_1", R1835600 = "occ1985_03_1",
R1848300 = "occ1985_04_1", R1861000 = "occ1985_05_1", R2171900 = "occ1986_01_1",
R2185500 = "occ1986_02_1", R2199100 = "occ1986_03_1", R2212700 = "occ1986_04_1",
R2226300 = "occ1986_05_1", R2376700 = "occ1987_01_1", R2388000 = "occ1987_02_1",
R2399300 = "occ1987_03_1", R2410600 = "occ1987_04_1", R2421900 = "occ1987_05_1",
R2771500 = "occ1988_01_1", R2784400 = "occ1988_02_1", R2797300 = "occ1988_03_1",
R2810200 = "occ1988_04_1", R2823100 = "occ1988_05_1", R3013300 = "occ1989_01_1",
R3026400 = "occ1989_02_1", R3039500 = "occ1989_03_1", R3052600 = "occ1989_04_1",
R3065700 = "occ1989_05_1", R3340700 = "occ1990_01_1", R3354700 = "occ1990_02_1",
R3368700 = "occ1990_03_1", R3382700 = "occ1990_04_1", R3396700 = "occ1990_05_1",
R3605000 = "occ1991_01_1", R3617100 = "occ1991_02_1", R3629200 = "occ1991_03_1",
R3641300 = "occ1991_04_1", R3653400 = "occ1991_05_1", R3955200 = "occ1992_01_1",
R3967400 = "occ1992_02_1", R3979600 = "occ1992_03_1", R3991800 = "occ1992_04_1",
R4004000 = "occ1992_05_1", R4587904 = "occ1994_01_1", R4631902 = "occ1994_02_1",
R4675902 = "occ1994_03_1", R4715202 = "occ1994_04_1", R4749402 = "occ1994_05_1",
R5270600 = "occ1996_01_1", R5310900 = "occ1996_02_1", R5349900 = "occ1996_03_1",
R5387000 = "occ1996_04_1", R5421500 = "occ1996_05_1", R6472600 = "occ1998_01_1",
R6472700 = "occ1998_02_1", R6472800 = "occ1998_03_1", R6472900 = "occ1998_04_1",
R6473000 = "occ1998_05_1", R6591800 = "occ2000_01_1", R6591900 = "occ2000_02_1",
R6592000 = "occ2000_03_1", R6592100 = "occ2000_04_1", R6592200 = "occ2000_05_1"
)
names_to_change_occ <- names(new_data) %in% names(column_mappings_occ)
names(new_data)[names_to_change_occ] <- column_mappings_occ[names(new_data)[names_to_change_occ]]
print(names(new_data))
###############
column_mappings_occ <- c(R7209600 = "occ2002_01_1", R7209700 = "occ2002_02_1", R7209800 = "occ2002_03_1",
R7209900 = "occ2002_04_1", R7210000 = "occ2002_05_1",
R7898000 = "occ2004_01_1", R7898100 = "occ2004_02_1", R7898200 = "occ2004_03_1",
R7898300 = "occ2004_04_1", R7898400 = "occ2004_05_1", T0138400 = "occ2006_01_1",
T0138500 = "occ2006_02_1", T0138600 = "occ2006_03_1", T0138700 = "occ2006_04_1",
T0138800 = "occ2006_05_1", T1298000 = "occ2008_01_1", T1298100 = "occ2008_02_1",
T1298200 = "occ2008_03_1", T1298300 = "occ2008_04_1", T1298400 = "occ2008_05_1",
T2326500 = "occ2010_01_1", T2326600 = "occ2010_02_1", T2326700 = "occ2010_03_1",
T2326800 = "occ2010_04_1", T2326900 = "occ2010_05_1", T3308700 = "occ2012_01_1",
T3308800 = "occ2012_02_1", T3308900 = "occ2012_03_1", T3309000 = "occ2012_04_1",
T3309100 = "occ2012_05_1", T4282800 = "occ2014_01_1", T4282900 = "occ2014_02_1",
T4283000 = "occ2014_03_1", T4283100 = "occ2014_04_1", T4283200 = "occ2014_05_1",
T5256900 = "occ2016_01_1", T5257000 = "occ2016_02_1", T5257100 = "occ2016_03_1",
T5257200 = "occ2016_04_1", T5257300 = "occ2016_05_1", T7818600 = "occ2018_01_1",
T7818700 = "occ2018_02_1", T7818800 = "occ2018_03_1", T7818900 = "occ2018_04_1",
T7819000 = "occ2018_05_1", T8428300 = "occ2020_01_1", T8428400 = "occ2020_02_1",
T8428500 = "occ2020_03_1", T8428600 = "occ2020_04_1", T8428700 = "occ2020_05_1"
)
names_to_change_occ <- names(new_data) %in% names(column_mappings_occ)
names(new_data)[names_to_change_occ] <- column_mappings_occ[names(new_data)[names_to_change_occ]]
print(names(new_data))
####################
column_mappings_deg <- c(
R2509800 = "deg1988", R2909200 = "deg1989", R3111200 = "deg1990",
R3511200 = "deg1991", R3711200 = "deg1992", R4138900 = "deg1993",
R4527600 = "deg1994", R5222900 = "deg1996", R5822800 = "deg1998",
R6541400 = "deg2000", R7104600 = "deg2002", R7811500 = "deg2004",
T0015400 = "deg2006", T1215400 = "deg2008",
T2274100 = 'deg2010', T3214200 = 'deg2012', T4202500 = 'deg2014',
T5177500 = 'deg2016', T7745300 = 'deg2018', T8356200 = 'deg2020'
)
names_to_change_deg <- names(new_data) %in% names(column_mappings_deg)
names(new_data)[names_to_change_deg] <- column_mappings_deg[names(new_data)[names_to_change_deg]]
print(names(new_data))
###############################
######################
column_mappings_nw <- c(
R1790803 = "nw1985", R2153903 = "nw1986", R2362503 = "nw1987",
R2735403 = "nw1988", R2982903 = "nw1989", R3293303 = "nw1990",
R3911003 = "nw1992", R4392303 = "nw1993", R5046603 = "nw1994",
R5728003 = "nw1996", R6426003 = "nw1998", R6940103 = "nw2000",
R8378703 = "nw2004", T2142702 = "nw2008", T4045802 = "nw2012",
T5684500 = "nw2016", T8727900 = "nw2020"
)
names_to_change_nw <- names(new_data) %in% names(column_mappings_nw)
names(new_data)[names_to_change_nw] <- column_mappings_nw[names(new_data)[names_to_change_nw]]
print(names(new_data))
###################
deg_columns <- c(
"deg1988", "deg1989", "deg1990",
"deg1991", "deg1992", "deg1993",
"deg1994", "deg1996", "deg1998",
"deg2000", "deg2002", "deg2004",
"deg2006", "deg2008", "deg2010",
"deg2012", "deg2014", "deg2016",
"deg2018", "deg2020"
)
existing_deg_columns <- deg_columns[deg_columns %in% names(new_data)]
for(col in existing_deg_columns) {
new_data[[col]][new_data[[col]]==4] <- 3
new_data[[col]][new_data[[col]]==5] <- 4
new_data[[col]][new_data[[col]]==6] <- 5
new_data[[col]][new_data[[col]]==7] <- 5
new_data[[col]][new_data[[col]]==8] <- NA
}
new_data$perdeg <- NA
for(col in deg_columns) {
new_data$perdeg[is.na(new_data$perdeg) & !is.na(new_data[[col]])] <- new_data[[col]][is.na(new_data$perdeg) & !is.na(new_data[[col]])]
}
###################
###################
income_columns <- c('inc1988_1', 'inc1989_1', 'inc1990_1', 'inc1991_1', 'inc1992_1', 'inc1993_1', 'inc1994_1', 'inc1996_1', 'inc1998_1', "inc2000_1", "inc2002", "inc2004", "inc2006", "inc2008",
"inc2010", "inc2012", "inc2014", "inc2016", "inc2018", "inc2020")
existing_income_columns <- income_columns[income_columns %in% names(new_data)]
yearly_averages <- sapply(new_data[, existing_income_columns], mean, na.rm = TRUE)
new_data$weighted_mean_income <- apply(new_data[, existing_income_columns], 1, function(row) {
non_missing_indices <- !is.na(row)
total_income <- sum(row[non_missing_indices], na.rm = TRUE)
total_average <- sum(yearly_averages[non_missing_indices], na.rm = TRUE)
weighted_mean <- ifelse(total_average > 0, total_income / total_average, NA)
return(weighted_mean)
})
print(head(new_data$weighted_mean_income))
###################
nw_columns <- c(
"nw1988", "nw1989", "nw1990","nw1992", "nw1993", "nw1994","nw1996", "nw1998", "nw2000","nw2004", "nw2008", "nw2012","nw2016", "nw2020"
)
existing_nw_columns <- nw_columns[nw_columns %in% names(new_data)]
yearly_averages <- sapply(new_data[, existing_nw_columns], mean, na.rm = TRUE)
new_data$weighted_mean_nw <- apply(new_data[, existing_nw_columns], 1, function(row) {
non_missing_indices <- !is.na(row)
total_nw <- sum(row[non_missing_indices], na.rm = TRUE)
total_average <- sum(yearly_averages[non_missing_indices], na.rm = TRUE)
weighted_mean <- ifelse(total_average > 0, total_nw / total_average, NA)
return(weighted_mean)
})
print(head(new_data$weighted_mean_nw))
describe2(new_data$weighted_mean_nw)
(0.88+1433)/18.9
new_data$weighted_mean_nw[new_data$weighted_mean_nw < -100] <- NA
describe2(new_data$weighted_mean_nw)
#############3
new_data$temp <- getpc(new_data %>% select(weighted_mean_nw, weighted_mean_income, perdeg))
job_columns <- grep("occ", names(new_data), value = TRUE)
job_columns <- job_columns[1:84]
rankings <- list()
for (job in job_columns) {
new_data$temp2 <- new_data[[job]]
rankings[[job]] <- new_data %>% group_by(temp2) %>% summarise(status = mean(temp, na.rm=T))
}
average_status_by_temp2 <- list()
for (job_name in names(rankings)) {
average_status_by_temp2[[job_name]] <- aggregate(status ~ temp2, data = rankings[[job_name]], mean)
}
combined_averages <- Reduce(function(x, y) merge(x, y, by = "temp2", all = TRUE), average_status_by_temp2)
combined_averages$sum <- rowMeans(combined_averages[, 2:85], na.rm=T)
pol <- combined_averages %>% select(sum, temp2)
###################
job_columns <- grep("occ", names(new_data), value = TRUE)
job_columns <- job_columns[85:134]
rankings <- list()
for (job in job_columns) {
new_data$temp2 <- new_data[[job]]
rankings[[job]] <- new_data %>% group_by(temp2) %>% summarise(status = mean(temp, na.rm=T))
}
average_status_by_temp2 <- list()
for (job_name in names(rankings)) {
average_status_by_temp2[[job_name]] <- aggregate(status ~ temp2, data = rankings[[job_name]], mean)
}
combined_averages <- Reduce(function(x, y) merge(x, y, by = "temp2", all = TRUE), average_status_by_temp2)
combined_averages$sum <- rowMeans(combined_averages[, 2:51], na.rm=T)
pol2 <- combined_averages %>% select(sum, temp2)
###################
job_columns <- grep("occ", names(new_data), value = TRUE)
job_columns <- job_columns[1:84]
for (jobcol in job_columns) {
temp <- paste0(jobcol, '_rank')
new_data$temp_key <- new_data[[jobcol]]
new_data <- new_data %>%
left_join(pol, by = c("temp_key" = "temp2")) %>%
mutate(!!temp := sum) %>%
select(-sum)
new_or.new_datadata <- select(new_data, -temp_key)
}
job_columns <- grep("occ", names(new_data), value = TRUE)
job_columns <- job_columns[85:134]
for (jobcol in job_columns) {
temp <- paste0(jobcol, '_rank')
new_data$temp_key <- new_data[[jobcol]]
new_data <- new_data %>%
left_join(pol2, by = c("temp_key" = "temp2")) %>%
mutate(!!temp := sum) %>%
select(-sum)
new_or.new_datadata <- select(new_data, -temp_key)
}
years <- seq(1988, 2020, by = 1)
for (year in years) {
rank_cols <- grep(paste0('^occ', year, '.*_rank$'), names(new_data), value = TRUE)
if (length(rank_cols) == 5) {
new_data <- new_data %>%
mutate(!!paste0("highest_rank_job_", year) := pmax(!!!rlang::syms(rank_cols), na.rm = TRUE))
} else {
warning(paste("Expected 5 job rank columns for year", year, "but found", length(rank_cols)))
}
}
new_data <- data.frame(new_data)
occ_columns <- grep(paste0("highest"), names(new_data), value = TRUE)
for(col in occ_columns) {
new_data[[col]] <- new_data[[col]] - min(new_data[[col]], na.rm=T)
}
existing_occ_columns <- occ_columns[occ_columns %in% names(new_data)]
yearly_averages <- sapply(new_data[, existing_occ_columns], mean, na.rm = TRUE)
new_data$weighted_occ <- apply(new_data[, existing_occ_columns], 1, function(row) {
non_missing_indices <- !is.na(row)
total_occ <- sum(row[non_missing_indices], na.rm = TRUE)
total_average <- sum(yearly_averages[non_missing_indices], na.rm = TRUE)
weighted_mean <- ifelse(total_occ > 0, total_occ / total_average, NA)
return(weighted_mean)
})
#######################
########3
new_data$weighted_occ[!is.na(new_data$weighted_occ)] <- agecorrect('weighted_occ', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
new_data$weighted_mean_income[!is.na(new_data$weighted_mean_income)] <- agecorrect('weighted_mean_income', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
new_data$perdeg[!is.na(new_data$perdeg)] <- agecorrect('perdeg', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
new_data$weighted_mean_nw[!is.na(new_data$weighted_mean_nw)] <- agecorrect('weighted_mean_nw', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
#########################33
new_data$lognw <- log(new_data$weighted_mean_nw - min(new_data$weighted_mean_nw, na.rm=T) + 0.1)
new_data$loginc <- log(new_data$weighted_mean_income - min(new_data$weighted_mean_income, na.rm=T) + 0.1)
new_data$logocc <- log(new_data$weighted_occ - min(new_data$weighted_occ, na.rm=T) + 0.1)
new_data$logperdeg <- log(new_data$perdeg - min(new_data$perdeg, na.rm=T) + 1)
GG_scatter(new_data, 'IQ', 'lognw') + geom_smooth()
GG_scatter(new_data, 'IQ', 'loginc') + geom_smooth()
GG_scatter(new_data, 'IQ', 'logocc') + geom_smooth()
GG_scatter(new_data, 'IQ', 'logperdeg') + geom_smooth()
GG_scatter(new_data, 'IQ', 'weighted_mean_nw') + geom_smooth()
GG_scatter(new_data, 'IQ', 'weighted_mean_income') + geom_smooth()
GG_scatter(new_data, 'IQ', 'weighted_occ') + geom_smooth()
GG_scatter(new_data, 'IQ', 'perdeg') + geom_smooth()
new_data$ses <- getpc(new_data %>% select(logocc, loginc, perdeg, lognw), normalizeit = T, fillmissing=F, dofa=F)
new_data$altses <- getpc(new_data %>% select(loginc, lognw, logocc), normalizeit = T, fillmissing=F, dofa=F)
psych::reliability(new_data %>% select(loginc, lognw, logocc))
keys not specified, all items will be scored
Omega_h for 1 factor is not meaningful, just omega_t
Warning: Omega_h and Omega_asymptotic are not meaningful with one factor
Measures of reliability
psych::reliability(keys = new_data %>% select(loginc, lognw,
logocc))
omega_h alpha omega.tot Uni tau cong max.split min.split mean.r med.r n.items CFI ECV
All_items 0.71 0.71 0.71 0.99 0.99 1 0.66 0.58 0.45 0.42 3 1 0.99
GG_scatter(new_data, 'IQ', 'ses') + geom_smooth()
GG_scatter(new_data, 'IQ', 'altses') + geom_smooth()
##########################
new_data$ses2004 <- getpc(new_data %>% select(inc2004, highest_rank_job_2004, perdeg, nw2004), normalizeit = T, fillmissing=F, dofa=F)
cor.test(new_data$ses2004, new_data$IQ)
Pearson's product-moment correlation
data: new_data$ses2004 and new_data$IQ
t = 47.704, df = 5054, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5378970 0.5759201
sample estimates:
cor
0.5572006
correlation_matrix(new_data %>% select(logocc, loginc, perdeg, lognw, ses, IQ, pses))
logocc loginc perdeg lognw ses IQ pses
logocc "NA" "0.527 ***" "0.588 ***" "0.417 ***" "0.838 ***" "0.558 ***" "0.387 ***"
loginc "0.527 ***" "NA" "0.417 ***" "0.404 ***" "0.773 ***" "0.495 ***" "0.266 ***"
perdeg "0.588 ***" "0.417 ***" "NA" "0.365 ***" "0.784 ***" "0.525 ***" "0.426 ***"
lognw "0.417 ***" "0.404 ***" "0.365 ***" "NA" "0.702 ***" "0.39 ***" "0.286 ***"
ses "0.838 ***" "0.773 ***" "0.784 ***" "0.702 ***" "NA" "0.641 ***" "0.45 ***"
IQ "0.558 ***" "0.495 ***" "0.525 ***" "0.39 ***" "0.641 ***" "NA" "0.488 ***"
pses "0.387 ***" "0.266 ***" "0.426 ***" "0.286 ***" "0.45 ***" "0.488 ***" "NA"
new_data$ses2004adj[!is.na(new_data$ses2004)] <- agecorrect('ses2004', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
new_data$highest_rank_job_2004adj[!is.na(new_data$highest_rank_job_2004)] <- agecorrect('highest_rank_job_2004', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
new_data$nw2004adj[!is.na(new_data$nw2004)] <- agecorrect('nw2004', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
new_data$inc2004adj[!is.na(new_data$inc2004)] <- agecorrect('inc2004', agevectorname='age2024', datafr = new_data, normalizeit=T, splinex = 6)
##############################
cor.test(new_data$inc2004adj, new_data$IQ)
Pearson's product-moment correlation
data: new_data$inc2004adj and new_data$IQ
t = 34.093, df = 6997, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3571598 0.3973450
sample estimates:
cor
0.3774301
cor.test(log(new_data$inc2004adj - min(new_data$inc2004adj, na.rm=T) + .1), new_data$IQ)
Pearson's product-moment correlation
data: log(new_data$inc2004adj - min(new_data$inc2004adj, na.rm = T) + 0.1) and new_data$IQ
t = 36.915, df = 6997, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3839461 0.4231684
sample estimates:
cor
0.4037427
new_data$loginc2004adj <- log(new_data$inc2004adj - min(new_data$inc2004adj, na.rm=T) + .1)
cor.test(new_data$nw2004adj, new_data$IQ)
Pearson's product-moment correlation
data: new_data$nw2004adj and new_data$IQ
t = 24.508, df = 6092, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.2765487 0.3222607
sample estimates:
cor
0.2995766
cor.test(log(new_data$nw2004adj - min(new_data$nw2004adj, na.rm=T) + .1), new_data$IQ)
Pearson's product-moment correlation
data: log(new_data$nw2004adj - min(new_data$nw2004adj, na.rm = T) + 0.1) and new_data$IQ
t = 31.125, df = 6092, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3485462 0.3918761
sample estimates:
cor
0.3704126
new_data$lognw2004adj <- log(new_data$nw2004adj - min(new_data$nw2004adj, na.rm=T) + .1)
cor.test(new_data$highest_rank_job_2004adj, new_data$IQ)
Pearson's product-moment correlation
data: new_data$highest_rank_job_2004adj and new_data$IQ
t = 41.821, df = 6397, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.4438983 0.4823864
sample estimates:
cor
0.4633609
cor.test(log(new_data$highest_rank_job_2004adj - min(new_data$highest_rank_job_2004adj, na.rm=T) + .1), new_data$IQ)
Pearson's product-moment correlation
data: log(new_data$highest_rank_job_2004adj - min(new_data$highest_rank_job_2004adj, na.rm = T) + 0.1) and new_data$IQ
t = 45.756, df = 6397, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.4778761 0.5148028
sample estimates:
cor
0.4965641
new_data$loghighest_rank_job_2004adj <- log(new_data$highest_rank_job_2004adj - min(new_data$highest_rank_job_2004adj, na.rm=T) + .1)
new_data$ses2004 <- getpc(new_data %>% select(loginc2004adj, loghighest_rank_job_2004adj, perdeg, lognw2004adj), normalizeit = T, fillmissing=F, dofa=F)
cor.test(new_data$ses2004, new_data$IQ)
Pearson's product-moment correlation
data: new_data$ses2004 and new_data$IQ
t = 54.734, df = 5054, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5924489 0.6270717
sample estimates:
cor
0.6100514
rs <- rt(n1 = cor.test(new_data$IQ, new_data$loginc2004adj)$parameter+2, n2 = cor.test(new_data$IQ, new_data$loginc)$parameter+2, r1 = cor.test(new_data$IQ, new_data$loginc)$estimate, r2 = cor.test(new_data$IQ, new_data$loginc2004adj)$estimate)
rs
[1] "z = -7.44128807390532p = 9.97082432477708e-14"
rs <- rt(n1 = cor.test(new_data$IQ, new_data$lognw)$parameter+2, n2 = cor.test(new_data$IQ, new_data$lognw2004adj)$parameter+2, r1 = cor.test(new_data$IQ, new_data$lognw)$estimate, r2 = cor.test(new_data$IQ, new_data$lognw2004adj)$estimate)
rs
[1] "z = -1.4389030616403p = 0.150177989969577"
rs <- rt(n1 = cor.test(new_data$IQ, new_data$logocc)$parameter+2, n2 = cor.test(new_data$IQ, new_data$loghighest_rank_job_2004adj)$parameter+2, r1 = cor.test(new_data$IQ, new_data$logocc)$estimate, r2 = cor.test(new_data$IQ, new_data$loghighest_rank_job_2004adj)$estimate)
rs
[1] "z = -5.30432441192795p = 1.13091038508095e-07"
rs <- rt(n1 = cor.test(new_data$IQ, new_data$ses)$parameter+2, n2 = cor.test(new_data$IQ, new_data$ses2004)$parameter+2, r1 = cor.test(new_data$IQ, new_data$ses)$estimate, r2 = cor.test(new_data$IQ, new_data$ses2004)$estimate)
rs
[1] "z = -2.92809828568133p = 0.00341042218925073"
###########3
p <- pca(new_data %>% select(iqtests), rotate='none', nfactors=1)
p
Principal Components Analysis
Call: principal(r = r, nfactors = nfactors, residuals = residuals,
rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
missing = missing, impute = impute, oblique.scores = oblique.scores,
method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
Standardized loadings (pattern matrix) based upon correlation matrix
PC1
SS loadings 6.60
Proportion Var 0.66
Mean item complexity = 1
Test of the hypothesis that 1 component is sufficient.
The root mean square of the residuals (RMSR) is 0.09
with the empirical chi square 10169.12 with prob < 0
Fit based upon off diagonal values = 0.98
debi <- data.frame(v = rep('', length(iqtests)), r = rep(0, length(iqtests)))
debi$v <- NA
i = 1
for(vec in iqtests) {
debi$v[i] <- vec
debi$r[i] <- cor.test(new_data[[vec]], new_data$ses)$estimate
i = i + 1
}
debi$v
[1] "GS" "AR" "WK" "PC" "NO" "CS" "AS" "MK" "MC" "EI"
debi$l <- p$loadings
debi$l <- as.numeric(debi$l)
fit2 <- lm(data=debi, r ~ l)
summary(fit2)
Call:
lm(formula = r ~ l, data = debi)
Residuals:
Min 1Q Median 3Q Max
-0.083712 -0.040816 -0.004141 0.032796 0.102505
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.004894 0.218436 -0.022 0.9827
l 0.646865 0.268801 2.406 0.0427 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.05928 on 8 degrees of freedom
Multiple R-squared: 0.4199, Adjusted R-squared: 0.3474
F-statistic: 5.791 on 1 and 8 DF, p-value: 0.04274
uzi3 <- seq(from=0.59, to=0.88, by=0.01)
uzi4 <- data.frame(l=uzi3)
uzi4$fit = predict(fit2, uzi4, interval = "confidence")
p <- ggplot(uzi4) +
geom_point(mapping = aes(x=l, y=r), data=debi) +
geom_line(data = uzi4, aes(x = l, y = fit[, 1]), color = "green", size = 1) +
geom_ribbon(data = uzi4, aes(x = l, ymin = fit[, 2], ymax = fit[, 3]), alpha = 0.45) +
geom_text(data = debi, aes(x = l, y = r, label = v), vjust = -.44, size = 4) +
labs(title = "") +
xlab('g-loading') +
ylab('Correlation with SES') +
theme_bw() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
p
file_name <- paste0('output/jchart2.jpg')
ggsave(plot = p, filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
lr <- lm(data=debi, r ~ l)
summary(lr)
Call:
lm(formula = r ~ l, data = debi)
Residuals:
Min 1Q Median 3Q Max
-0.083712 -0.040816 -0.004141 0.032796 0.102505
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.004894 0.218436 -0.022 0.9827
l 0.646865 0.268801 2.406 0.0427 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.05928 on 8 degrees of freedom
Multiple R-squared: 0.4199, Adjusted R-squared: 0.3474
F-statistic: 5.791 on 1 and 8 DF, p-value: 0.04274
0.70471-0.06328
[1] 0.64143
cor.test(debi$r, debi$l)
Pearson's product-moment correlation
data: debi$r and debi$l
t = 2.4065, df = 8, p-value = 0.04274
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.03105608 0.90740989
sample estimates:
cor
0.6480113
###########
lat0 <- "
#LATENTS
S =~ logocc + loginc + perdeg + lognw
G =~ GS+AR+WK+PC+NO+CS+AS+MK+MC+EI
S ~~ G
"
latfit1 <- sem(model = lat0, data=new_data)
summary(latfit1, fit.measures=T, standardize=T)
lavaan 0.6.17 ended normally after 40 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 29
Used Total
Number of observations 8931 12686
Model Test User Model:
Test statistic 15061.472
Degrees of freedom 76
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 95101.955
Degrees of freedom 91
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.842
Tucker-Lewis Index (TLI) 0.811
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -117398.486
Loglikelihood unrestricted model (H1) -109867.750
Akaike (AIC) 234854.972
Bayesian (BIC) 235060.793
Sample-size adjusted Bayesian (SABIC) 234968.636
Root Mean Square Error of Approximation:
RMSEA 0.149
90 Percent confidence interval - lower 0.147
90 Percent confidence interval - upper 0.151
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 1.000
Standardized Root Mean Square Residual:
SRMR 0.066
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
S =~
logocc 1.000 0.354 0.795
loginc 1.319 0.022 61.179 0.000 0.467 0.682
perdeg 2.014 0.031 64.004 0.000 0.713 0.714
lognw 0.559 0.011 49.234 0.000 0.198 0.553
G =~
GS 1.000 0.886 0.883
AR 0.971 0.008 114.731 0.000 0.861 0.860
WK 0.994 0.008 121.813 0.000 0.881 0.885
PC 0.930 0.009 105.275 0.000 0.824 0.824
NO 0.768 0.010 77.699 0.000 0.681 0.687
CS 0.686 0.010 66.167 0.000 0.608 0.614
AS 0.780 0.010 79.323 0.000 0.692 0.697
MK 0.936 0.009 105.769 0.000 0.830 0.826
MC 0.871 0.009 93.979 0.000 0.772 0.774
EI 0.905 0.009 100.162 0.000 0.803 0.802
Covariances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
S ~~
G 0.231 0.005 46.436 0.000 0.737 0.737
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.logocc 0.073 0.002 43.543 0.000 0.073 0.368
.loginc 0.251 0.005 55.149 0.000 0.251 0.535
.perdeg 0.490 0.009 52.779 0.000 0.490 0.491
.lognw 0.089 0.001 60.996 0.000 0.089 0.694
.GS 0.223 0.004 55.337 0.000 0.223 0.221
.AR 0.260 0.005 57.568 0.000 0.260 0.260
.WK 0.215 0.004 55.089 0.000 0.215 0.217
.PC 0.322 0.005 59.971 0.000 0.322 0.321
.NO 0.517 0.008 63.932 0.000 0.517 0.528
.CS 0.611 0.009 64.871 0.000 0.611 0.623
.AS 0.507 0.008 63.775 0.000 0.507 0.514
.MK 0.321 0.005 59.865 0.000 0.321 0.318
.MC 0.399 0.006 61.991 0.000 0.399 0.401
.EI 0.357 0.006 60.976 0.000 0.357 0.356
S 0.125 0.003 41.263 0.000 1.000 1.000
G 0.786 0.015 52.772 0.000 1.000 1.000
#zero order IQ ~ SES (g2 = IQ standardized to mean = 0 and SD = 1)
lr <- lm(data=new_data, ses ~ g)
summary(lr)
Call:
lm(formula = ses ~ g, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.4315 -0.5209 -0.0672 0.4179 4.1110
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.008653 0.008148 1.062 0.288
g 0.645266 0.008168 78.999 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.77 on 8929 degrees of freedom
(3755 observations deleted due to missingness)
Multiple R-squared: 0.4114, Adjusted R-squared: 0.4113
F-statistic: 6241 on 1 and 8929 DF, p-value: < 2.2e-16
#zero order parental SES ~ SES
lr <- lm(data=new_data, ses ~ pses)
summary(lr)
Call:
lm(formula = ses ~ pses, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-3.1302 -0.6063 -0.1060 0.5005 4.8619
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.007534 0.009244 -0.815 0.415
pses 0.442662 0.009089 48.704 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.893 on 9332 degrees of freedom
(3352 observations deleted due to missingness)
Multiple R-squared: 0.2027, Adjusted R-squared: 0.2026
F-statistic: 2372 on 1 and 9332 DF, p-value: < 2.2e-16
#parental SES + IQ
lr <- lm(data=new_data, ses ~ pses + g)
summary(lr)
Call:
lm(formula = ses ~ pses + g, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.3269 -0.5127 -0.0589 0.4253 3.8226
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.005443 0.007983 0.682 0.495
pses 0.175910 0.009035 19.470 <2e-16 ***
g 0.556830 0.009200 60.525 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7542 on 8928 degrees of freedom
(3755 observations deleted due to missingness)
Multiple R-squared: 0.4354, Adjusted R-squared: 0.4352
F-statistic: 3442 on 2 and 8928 DF, p-value: < 2.2e-16
#parental SES + IQ + demographics
lr <- lm(data=new_data, ses ~ g + race + Female + pses)
summary(lr)
Call:
lm(formula = ses ~ g + race + Female + pses, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.4379 -0.5018 -0.0595 0.4223 3.8383
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.000e-01 2.149e-02 9.304 < 2e-16 ***
g 5.847e-01 1.046e-02 55.920 < 2e-16 ***
race2 -1.558e-01 2.511e-02 -6.205 5.71e-10 ***
race3 -2.806e-01 2.424e-02 -11.579 < 2e-16 ***
Female 9.194e-05 1.603e-02 0.006 0.995
pses 2.052e-01 9.427e-03 21.769 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7487 on 8925 degrees of freedom
(3755 observations deleted due to missingness)
Multiple R-squared: 0.4438, Adjusted R-squared: 0.4435
F-statistic: 1425 on 5 and 8925 DF, p-value: < 2.2e-16
lr <- lm(data=new_data, standardize(loginc) ~ standardize(g))
summary(lr)
Call:
lm(formula = standardize(loginc) ~ standardize(g), data = new_data)
Residuals:
Min 1Q Median 3Q Max
-3.5206 -0.5337 0.0652 0.5708 4.6372
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.032996 0.008407 3.925 8.74e-05 ***
standardize(g) 0.488271 0.008322 58.675 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8651 on 10605 degrees of freedom
(2079 observations deleted due to missingness)
Multiple R-squared: 0.2451, Adjusted R-squared: 0.245
F-statistic: 3443 on 1 and 10605 DF, p-value: < 2.2e-16
library(NlsyLinks)
nl <- NlsyLinks::Links79PairExpanded
nl <- nl %>% filter(RelationshipPath=='Gen1Housemates')
check <- nl %>% select('SubjectID_S1', 'SubjectID_S2')
ndreduced <- new_data %>% select('X', 'IQ', 'ses')
temp <- full_join(ndreduced, nl, by = c("X" = "SubjectID_S2"))
temp2 <- temp %>% select('X', 'IQ', 'ses', 'R', 'RFull', 'SubjectID_S1', 'EverSharedHouse')
temp2$SubjectID_S2 <- temp2$X
kin <- full_join(new_data, temp2, by = c("X" = "SubjectID_S1")) %>% filter(!is.na(SubjectID_S2))
kin <- kin %>% select('IQ.x', 'ses.x', 'IQ.y', 'ses.y', 'EverSharedHouse', 'X', 'SubjectID_S2', 'RFull')
kin <- kin[!duplicated(kin$X), ]
kin$sesdiff[!is.na(kin$ses.x) & !is.na(kin$ses.y)] <- kin$ses.y[!is.na(kin$ses.x) & !is.na(kin$ses.y)] - kin$ses.x[!is.na(kin$ses.x) & !is.na(kin$ses.y)]
kin$iqdiff[!is.na(kin$IQ.x) & !is.na(kin$IQ.y)] <- kin$IQ.y[!is.na(kin$IQ.x) & !is.na(kin$IQ.y)] - kin$IQ.x[!is.na(kin$IQ.x) & !is.na(kin$IQ.y)]
hm <- kin %>% filter(EverSharedHouse==T)
lr <- lm(data=hm, normalise(sesdiff) ~ normalise(iqdiff))
summary(lr)
Call:
lm(formula = normalise(sesdiff) ~ normalise(iqdiff), data = hm)
Residuals:
Min 1Q Median 3Q Max
-3.6235 -0.5594 0.0180 0.5602 4.0649
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.006902 0.017719 -0.39 0.697
normalise(iqdiff) 0.418985 0.017837 23.49 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9102 on 2637 degrees of freedom
(1277 observations deleted due to missingness)
Multiple R-squared: 0.173, Adjusted R-squared: 0.1727
F-statistic: 551.8 on 1 and 2637 DF, p-value: < 2.2e-16
cor.test(hm$sesdiff, hm$iqdiff)
Pearson's product-moment correlation
data: hm$sesdiff and hm$iqdiff
t = 23.49, df = 2637, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3839102 0.4470339
sample estimates:
cor
0.415973
fit2 <- lm(data=hm, sesdiff ~ iqdiff)
summary(fit2)
Call:
lm(formula = sesdiff ~ iqdiff, data = hm)
Residuals:
Min 1Q Median 3Q Max
-3.4709 -0.5358 0.0172 0.5366 3.8937
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02877 0.01698 1.694 0.0903 .
iqdiff 0.03265 0.00139 23.490 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8719 on 2637 degrees of freedom
(1277 observations deleted due to missingness)
Multiple R-squared: 0.173, Adjusted R-squared: 0.1727
F-statistic: 551.8 on 1 and 2637 DF, p-value: < 2.2e-16
fit4 <- lm(data=hm, sesdiff ~ rcs(iqdiff, 5))
summary(fit4)
Call:
lm(formula = sesdiff ~ rcs(iqdiff, 5), data = hm)
Residuals:
Min 1Q Median 3Q Max
-3.4197 -0.5370 0.0195 0.5406 3.9100
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.207869 0.102551 -2.027 0.04277 *
rcs(iqdiff, 5)iqdiff 0.018865 0.005838 3.231 0.00125 **
rcs(iqdiff, 5)iqdiff' 0.062075 0.028682 2.164 0.03054 *
rcs(iqdiff, 5)iqdiff'' -0.361302 0.191395 -1.888 0.05917 .
rcs(iqdiff, 5)iqdiff''' 0.542226 0.323606 1.676 0.09394 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8713 on 2634 degrees of freedom
(1277 observations deleted due to missingness)
Multiple R-squared: 0.1751, Adjusted R-squared: 0.1738
F-statistic: 139.7 on 4 and 2634 DF, p-value: < 2.2e-16
anova(fit4, fit2)
Analysis of Variance Table
Model 1: sesdiff ~ rcs(iqdiff, 5)
Model 2: sesdiff ~ iqdiff
Res.Df RSS Df Sum of Sq F Pr(>F)
1 2634 1999.8
2 2637 2004.7 -3 -4.9019 2.1521 0.09167 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
uzi3 <- seq(from=-45, to=51, by=0.01)
uzi4 <- data.frame(iqdiff=uzi3)
uzi4$fit = predict(fit2, uzi4, interval = "confidence")
p <- ggplot(uzi4) +
geom_point(mapping = aes(x=iqdiff, y=sesdiff), data=hm) +
geom_line(data = uzi4, aes(x = iqdiff, y = fit[, 1]), color = "green", size = 1) +
geom_ribbon(data = uzi4, aes(x = iqdiff, ymin = fit[, 2], ymax = fit[, 3]), alpha = 0.45) +
labs(title = "") +
xlab('Difference in IQ') +
ylab('Difference in SES') +
theme_bw() +
theme_minimal() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
p
file_name <- paste0('output/sibchart2.jpg')
ggsave(plot = p, filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
lr <- lm(data=new_data, ses ~ rcs(IQ, 6))
lr2 <- lm(data=new_data, ses ~ IQ)
summary(lr)
Call:
lm(formula = ses ~ rcs(IQ, 6), data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.4032 -0.5080 -0.0667 0.4062 3.9184
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.142207 0.249439 -16.606 <2e-16 ***
rcs(IQ, 6)IQ 0.041877 0.003164 13.237 <2e-16 ***
rcs(IQ, 6)IQ' -0.024019 0.023365 -1.028 0.304
rcs(IQ, 6)IQ'' 0.080220 0.118811 0.675 0.500
rcs(IQ, 6)IQ''' -0.050834 0.237339 -0.214 0.830
rcs(IQ, 6)IQ'''' 0.099567 0.265365 0.375 0.708
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.766 on 8925 degrees of freedom
(3755 observations deleted due to missingness)
Multiple R-squared: 0.4179, Adjusted R-squared: 0.4175
F-statistic: 1281 on 5 and 8925 DF, p-value: < 2.2e-16
summary(lr2)
Call:
lm(formula = ses ~ IQ, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.4315 -0.5209 -0.0672 0.4179 4.1110
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.2931188 0.0551108 -77.9 <2e-16 ***
IQ 0.0430177 0.0005445 79.0 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.77 on 8929 degrees of freedom
(3755 observations deleted due to missingness)
Multiple R-squared: 0.4114, Adjusted R-squared: 0.4113
F-statistic: 6241 on 1 and 8929 DF, p-value: < 2.2e-16
anova(lr, lr2)
Analysis of Variance Table
Model 1: ses ~ rcs(IQ, 6)
Model 2: ses ~ IQ
Res.Df RSS Df Sum of Sq F Pr(>F)
1 8925 5236.2
2 8929 5294.4 -4 -58.214 24.806 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
lr <- lm(data=new_data, logocc ~ rcs(IQ, 6))
lr2 <- lm(data=new_data, logocc ~ IQ)
summary(lr)
Call:
lm(formula = logocc ~ rcs(IQ, 6), data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.17760 -0.24831 0.00157 0.24468 1.56784
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.056981 0.113150 -9.341 <2e-16 ***
rcs(IQ, 6)IQ 0.016632 0.001437 11.572 <2e-16 ***
rcs(IQ, 6)IQ' 0.003292 0.010781 0.305 0.760
rcs(IQ, 6)IQ'' -0.040483 0.055338 -0.732 0.464
rcs(IQ, 6)IQ''' 0.088953 0.111603 0.797 0.425
rcs(IQ, 6)IQ'''' -0.010652 0.125848 -0.085 0.933
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.373 on 9496 degrees of freedom
(3184 observations deleted due to missingness)
Multiple R-squared: 0.3147, Adjusted R-squared: 0.3143
F-statistic: 872 on 5 and 9496 DF, p-value: < 2.2e-16
summary(lr2)
Call:
lm(formula = logocc ~ IQ, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.1826 -0.2480 0.0033 0.2447 1.5715
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.0694809 0.0256604 -41.68 <2e-16 ***
IQ 0.0167121 0.0002547 65.61 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3737 on 9500 degrees of freedom
(3184 observations deleted due to missingness)
Multiple R-squared: 0.3118, Adjusted R-squared: 0.3118
F-statistic: 4305 on 1 and 9500 DF, p-value: < 2.2e-16
anova(lr, lr2)
Analysis of Variance Table
Model 1: logocc ~ rcs(IQ, 6)
Model 2: logocc ~ IQ
Res.Df RSS Df Sum of Sq F Pr(>F)
1 9496 1321.2
2 9500 1326.7 -4 -5.45 9.7927 6.661e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
lr <- lm(data=new_data, loginc ~ rcs(IQ, 6))
lr2 <- lm(data=new_data, loginc ~ IQ)
summary(lr)
Call:
lm(formula = loginc ~ rcs(IQ, 6), data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.6637 -0.3946 0.0468 0.4204 3.3906
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.004196 0.176103 -17.059 <2e-16 ***
rcs(IQ, 6)IQ 0.028225 0.002242 12.592 <2e-16 ***
rcs(IQ, 6)IQ' -0.015992 0.017075 -0.937 0.349
rcs(IQ, 6)IQ'' 0.013551 0.088207 0.154 0.878
rcs(IQ, 6)IQ''' 0.045719 0.178897 0.256 0.798
rcs(IQ, 6)IQ'''' 0.013960 0.202724 0.069 0.945
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.634 on 10601 degrees of freedom
(2079 observations deleted due to missingness)
Multiple R-squared: 0.2469, Adjusted R-squared: 0.2466
F-statistic: 695.1 on 5 and 10601 DF, p-value: < 2.2e-16
summary(lr2)
Call:
lm(formula = loginc ~ IQ, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.5827 -0.3915 0.0478 0.4187 3.4018
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.665113 0.040903 -65.16 <2e-16 ***
IQ 0.023880 0.000407 58.67 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6346 on 10605 degrees of freedom
(2079 observations deleted due to missingness)
Multiple R-squared: 0.2451, Adjusted R-squared: 0.245
F-statistic: 3443 on 1 and 10605 DF, p-value: < 2.2e-16
anova(lr, lr2)
Analysis of Variance Table
Model 1: loginc ~ rcs(IQ, 6)
Model 2: loginc ~ IQ
Res.Df RSS Df Sum of Sq F Pr(>F)
1 10601 4260.8
2 10605 4271.2 -4 -10.391 6.4634 3.433e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
lr <- lm(data=new_data, perdeg ~ rcs(IQ, 6))
lr2 <- lm(data=new_data, perdeg ~ IQ)
summary(lr)
Call:
lm(formula = perdeg ~ rcs(IQ, 6), data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.1054 -0.5445 -0.1256 0.3548 4.4014
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.106082 0.255656 -12.149 <2e-16 ***
rcs(IQ, 6)IQ 0.031394 0.003249 9.662 <2e-16 ***
rcs(IQ, 6)IQ' -0.038801 0.024335 -1.594 0.111
rcs(IQ, 6)IQ'' 0.133816 0.124308 1.076 0.282
rcs(IQ, 6)IQ''' -0.008385 0.248983 -0.034 0.973
rcs(IQ, 6)IQ'''' -0.113599 0.278695 -0.408 0.684
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8466 on 9822 degrees of freedom
(2858 observations deleted due to missingness)
Multiple R-squared: 0.2912, Adjusted R-squared: 0.2909
F-statistic: 807.2 on 5 and 9822 DF, p-value: < 2.2e-16
summary(lr2)
Call:
lm(formula = perdeg ~ IQ, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-2.0021 -0.6245 -0.1631 0.4583 4.5080
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.5122852 0.0582896 -60.26 <2e-16 ***
IQ 0.0351671 0.0005754 61.11 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8558 on 9826 degrees of freedom
(2858 observations deleted due to missingness)
Multiple R-squared: 0.2754, Adjusted R-squared: 0.2753
F-statistic: 3735 on 1 and 9826 DF, p-value: < 2.2e-16
anova(lr, lr2)
Analysis of Variance Table
Model 1: perdeg ~ rcs(IQ, 6)
Model 2: perdeg ~ IQ
Res.Df RSS Df Sum of Sq F Pr(>F)
1 9822 7039.6
2 9826 7196.9 -4 -157.24 54.847 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
lr <- lm(data=new_data, lognw ~ rcs(IQ, 6))
lr2 <- lm(data=new_data, lognw ~ IQ)
summary(lr)
Call:
lm(formula = lognw ~ rcs(IQ, 6), data = new_data)
Residuals:
Min 1Q Median 3Q Max
-1.61453 -0.17640 -0.05390 0.06495 2.20073
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.1611876 0.0912449 -1.767 0.0773 .
rcs(IQ, 6)IQ 0.0049149 0.0011614 4.232 2.34e-05 ***
rcs(IQ, 6)IQ' 0.0090451 0.0088551 1.021 0.3071
rcs(IQ, 6)IQ'' -0.0009109 0.0457779 -0.020 0.9841
rcs(IQ, 6)IQ''' -0.0483892 0.0929119 -0.521 0.6025
rcs(IQ, 6)IQ'''' 0.0718870 0.1053273 0.683 0.4949
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3279 on 10522 degrees of freedom
(2158 observations deleted due to missingness)
Multiple R-squared: 0.1557, Adjusted R-squared: 0.1553
F-statistic: 388 on 5 and 10522 DF, p-value: < 2.2e-16
summary(lr2)
Call:
lm(formula = lognw ~ IQ, data = new_data)
Residuals:
Min 1Q Median 3Q Max
-1.61901 -0.17918 -0.05981 0.06891 2.19263
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.5131905 0.0212335 -24.17 <2e-16 ***
IQ 0.0091897 0.0002113 43.48 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3285 on 10526 degrees of freedom
(2158 observations deleted due to missingness)
Multiple R-squared: 0.1523, Adjusted R-squared: 0.1522
F-statistic: 1891 on 1 and 10526 DF, p-value: < 2.2e-16
anova(lr, lr2)
Analysis of Variance Table
Model 1: lognw ~ rcs(IQ, 6)
Model 2: lognw ~ IQ
Res.Df RSS Df Sum of Sq F Pr(>F)
1 10522 1131.6
2 10526 1136.2 -4 -4.5742 10.633 1.346e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#########
cor.test(new_data$IQ, new_data$loginc)
Pearson's product-moment correlation
data: new_data$IQ and new_data$loginc
t = 58.675, df = 10605, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.4805469 0.5092834
sample estimates:
cor
0.4950505
cor.test(new_data$IQ, new_data$logocc)
Pearson's product-moment correlation
data: new_data$IQ and new_data$logocc
t = 65.611, df = 9500, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5444238 0.5721015
sample estimates:
cor
0.558418
cor.test(new_data$IQ, new_data$perdeg)
Pearson's product-moment correlation
data: new_data$IQ and new_data$perdeg
t = 61.113, df = 9826, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5103185 0.5389731
sample estimates:
cor
0.5247945
cor.test(new_data$IQ, new_data$lognw)
Pearson's product-moment correlation
data: new_data$IQ and new_data$lognw
t = 43.483, df = 10526, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3739110 0.4062996
sample estimates:
cor
0.390226
cor.test(new_data$IQ, new_data$ses)
Pearson's product-moment correlation
data: new_data$IQ and new_data$ses
t = 78.999, df = 8929, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6290287 0.6534483
sample estimates:
cor
0.6414009
psych::reliability(new_data %>% select(occ_columns))
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(occ_columns)
# Now:
data %>% select(all_of(occ_columns))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.keys not specified, all items will be scored
Measures of reliability
psych::reliability(keys = new_data %>% select(occ_columns))
omega_h alpha omega.tot Uni tau cong max.split min.split mean.r med.r n.items CFI ECV
All_items 0.84 0.96 0.97 0.93 0.95 0.98 0.98 0.89 0.58 0.56 19 0.73 0.85
psych::reliability(new_data %>% select(income_columns))
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(income_columns)
# Now:
data %>% select(all_of(income_columns))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.keys not specified, all items will be scored
Measures of reliability
psych::reliability(keys = new_data %>% select(income_columns))
omega_h alpha omega.tot Uni tau cong max.split min.split mean.r med.r n.items CFI ECV
All_items 0.74 0.96 0.97 0.87 0.9 0.97 0.98 0.85 0.55 0.59 20 0.73 0.82
psych::reliability(new_data %>% select(nw_columns))
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(nw_columns)
# Now:
data %>% select(all_of(nw_columns))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.keys not specified, all items will be scored
In smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
In smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
In smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
Warning: Matrix was not positive definite, smoothing was doneIn smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
Warning: Matrix was not positive definite, smoothing was doneWarning: Matrix was not positive definite, smoothing was doneIn smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
In smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
In smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
Warning: Matrix was not positive definite, smoothing was doneIn smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
In smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
In smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
Warning: Matrix was not positive definite, smoothing was doneWarning: An ultra-Heywood case was detected. Examine the results carefullyIn smc, smcs > 1 were set to 1.0
In smc, smcs < 0 were set to .0
Measures of reliability
psych::reliability(keys = new_data %>% select(nw_columns))
omega_h alpha omega.tot Uni tau cong max.split min.split mean.r med.r n.items CFI ECV
All_items 0.49 0.91 0.94 0.76 0.85 0.9 0.98 0.7 0.42 0.44 14 0.19 0.69
psych::reliability(new_data %>% select(logocc, loginc, perdeg, lognw))
keys not specified, all items will be scored
Measures of reliability
psych::reliability(keys = new_data %>% select(logocc, loginc,
perdeg, lognw))
omega_h alpha omega.tot Uni tau cong max.split min.split mean.r med.r n.items CFI ECV
All_items 0.67 0.77 0.81 0.97 0.97 1 0.8 0.73 0.45 0.42 4 0.98 0.91
psych::reliability(new_data %>% select(iqtests))
keys not specified, all items will be scored
Measures of reliability
psych::reliability(keys = new_data %>% select(iqtests))
omega_h alpha omega.tot Uni tau cong max.split min.split mean.r med.r n.items CFI ECV
All_items 0.74 0.94 0.96 0.94 0.96 0.98 0.97 0.87 0.62 0.63 10 0.83 0.85
psych::reliability(new_data %>% select(father_rank, mother_rank, R0006500, R0007900))
keys not specified, all items will be scored
Measures of reliability
psych::reliability(keys = new_data %>% select(father_rank, mother_rank,
R0006500, R0007900))
omega_h alpha omega.tot Uni tau cong max.split min.split mean.r med.r n.items CFI ECV
All_items 0.5 0.79 0.88 0.93 0.94 0.98 0.85 0.73 0.48 0.46 4 0.92 0.83
corrforatt(new_data, r1=0.95, r2=0.82, 'IQ', 'ses')
[1] "Corrected corr: 0.726709532148331"
[1] "Upper lim: 0.740359280441101"
[1] "Lower lim: 0.712691732024547"
corrforatt(new_data, r1=.88, r2=0.82, 'pses', 'ses')
[1] "Corrected corr: 0.529969265834988"
[1] "Upper lim: 0.548838905503682"
[1] "Lower lim: 0.510751768419127"
corrforatt(new_data, r1=0.95, r2=.94, 'IQ', 'lognw')
[1] "Corrected corr: 0.412943387110926"
[1] "Upper lim: 0.429952727144582"
[1] "Lower lim: 0.395678560934228"
corrforatt(new_data, r1=0.95, r2=.97, 'IQ', 'loginc')
[1] "Corrected corr: 0.515705578075031"
[1] "Upper lim: 0.530532277230285"
[1] "Lower lim: 0.500596848618003"
corrforatt(new_data, r1=0.95, r2=.88, 'IQ', 'perdeg')
[1] "Corrected corr: 0.573965947343408"
[1] "Upper lim: 0.58947309764094"
[1] "Lower lim: 0.558133630324956"