Cleaning up ipums data:
newpums<-ipums%>%
filter(inctot<9999999)%>%
filter(inctot>0)%>%
filter(incwage<9999998)%>%
filter(poverty>0)%>%
filter(educ>2)%>%
filter(empstat>0)%>%
filter(empstat<3)%>%
filter(hwsei>0)%>%
filter(age<65)%>%
filter(age>16)%>%
filter(fertyr>0)%>%
filter(fertyr<3)%>%
mutate(ethicsremap = case_when( .$hispan %in% c(1:4) & .$race %in%c(1:9) ~ "Hispanic",
.$hispan ==0 & .$race==1 ~"White",
.$hispan ==0 & .$race==2 ~"Black",
.$hispan ==0 & .$race==3 ~"American/Alaskan Native",
.$hispan ==0 & .$race==4 ~"Chinese",
.$hispan ==0 & .$race==5 ~"Japanese",
.$hispan ==0 & .$race==6 ~"Other Asian or Pacific Islander",
.$hispan ==0 & .$race==7 ~"Other Race",
.$hispan ==0 & .$race%in%c(8:9) ~"Two or more Races",
.$hispan ==9 ~ "Missing"))
Multiple Regression Model with continuous and ordinal predictors:
myfit<-lm(inctot~scale(educ)+scale(hwsei)+scale(incwage), data=newpums)
summary(myfit)
##
## Call:
## lm(formula = inctot ~ scale(educ) + scale(hwsei) + scale(incwage),
## data = newpums)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14578 -3303 -2113 -1464 440673
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38668.67 75.69 510.874 < 2e-16 ***
## scale(educ) 849.21 97.93 8.672 < 2e-16 ***
## scale(hwsei) 471.40 99.72 4.727 2.28e-06 ***
## scale(incwage) 40775.85 85.16 478.792 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15440 on 41619 degrees of freedom
## Multiple R-squared: 0.8775, Adjusted R-squared: 0.8775
## F-statistic: 9.936e+04 on 3 and 41619 DF, p-value: < 2.2e-16
Examining correlation among predictors:
cor(newpums[,c("inctot","incwage","hwsei", "educ","sex", "poverty")], use = "pairwise")
## Warning in cor(newpums[, c("inctot", "incwage", "hwsei", "educ", "sex", :
## the standard deviation is zero
## inctot incwage hwsei educ sex poverty
## inctot 1.0000000 0.9364237 0.4190579 0.3884426 NA 0.4588780
## incwage 0.9364237 1.0000000 0.4289368 0.3922813 NA 0.4593886
## hwsei 0.4190579 0.4289368 1.0000000 0.6187563 NA 0.4123490
## educ 0.3884426 0.3922813 0.6187563 1.0000000 NA 0.4057191
## sex NA NA NA NA NA NA
## poverty 0.4588780 0.4593886 0.4123490 0.4057191 NA 1.0000000
Testing for collinarity problem:
vif(myfit)
## scale(educ) scale(hwsei) scale(incwage)
## 1.673880 1.735629 1.265935
All okay (values <2).
Creating a continuous outcome variable:
newpums$FinancialStability<-scale(newpums$inctot)+scale(newpums$incwage)+scale(newpums$hwsei)
Creating predictors:
mypred1<-lm(educ~FinancialStability+scale(inctot)+scale(hwsei)+scale(incwage), data=newpums)
summary(mypred1)
##
## Call:
## lm(formula = educ ~ FinancialStability + scale(inctot) + scale(hwsei) +
## scale(incwage), data = newpums)
##
## Residuals:
## <Labelled double>
## Min 1Q Median 3Q Max
## -7.0973 -1.0370 -0.0422 1.1176 4.9422
##
## Labels:
## value label
## 0 n/a or no schooling
## 1 nursery school to grade 4
## 2 grade 5, 6, 7, or 8
## 3 grade 9
## 4 grade 10
## 5 grade 11
## 6 grade 12
## 7 1 year of college
## 8 2 years of college
## 9 3 years of college
## 10 4 years of college
## 11 5+ years of college
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.056147 0.007731 1042.034 < 2e-16 ***
## FinancialStability 0.140111 0.022181 6.317 2.7e-10 ***
## scale(inctot) 0.051253 0.043383 1.181 0.237
## scale(hwsei) 0.983374 0.024679 39.847 < 2e-16 ***
## scale(incwage) NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.577 on 41619 degrees of freedom
## Multiple R-squared: 0.4037, Adjusted R-squared: 0.4036
## F-statistic: 9391 on 3 and 41619 DF, p-value: < 2.2e-16
mypred2<-lm(sex~FinancialStability+scale(inctot)+scale(hwsei)+scale(incwage), data=newpums)
summary(mypred2)
##
## Call:
## lm(formula = sex ~ FinancialStability + scale(inctot) + scale(hwsei) +
## scale(incwage), data = newpums)
##
## Residuals:
## <Labelled double>
## Min 1Q Median 3Q Max
## -1.00000e-14 -5.00000e-15 -4.00000e-15 -2.00000e-15 1.43147e-10
##
## Labels:
## value label
## 1 male
## 2 female
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.000e+00 3.439e-15 5.815e+14 <2e-16 ***
## FinancialStability 1.655e-15 9.867e-15 1.680e-01 0.867
## scale(inctot) -3.045e-15 1.930e-14 -1.580e-01 0.875
## scale(hwsei) -3.463e-15 1.098e-14 -3.150e-01 0.752
## scale(incwage) NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.017e-13 on 41619 degrees of freedom
## Multiple R-squared: 0.5, Adjusted R-squared: 0.5
## F-statistic: 1.387e+04 on 3 and 41619 DF, p-value: < 2.2e-16
mypred3<-lm(age~FinancialStability+scale(inctot)+scale(hwsei)+scale(incwage), data=newpums)
summary(mypred3)
##
## Call:
## lm(formula = age ~ FinancialStability + scale(inctot) + scale(hwsei) +
## scale(incwage), data = newpums)
##
## Residuals:
## <Labelled double>
## Min 1Q Median 3Q Max
## -45.290 -7.901 -0.454 7.779 18.451
##
## Labels:
## value label
## 0 less than 1 year old
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
## 11 11
## 12 12
## 13 13
## 14 14
## 15 15
## 16 16
## 17 17
## 18 18
## 19 19
## 20 20
## 21 21
## 22 22
## 23 23
## 24 24
## 25 25
## 26 26
## 27 27
## 28 28
## 29 29
## 30 30
## 31 31
## 32 32
## 33 33
## 34 34
## 35 35
## 36 36
## 37 37
## 38 38
## 39 39
## 40 40
## 41 41
## 42 42
## 43 43
## 44 44
## 45 45
## 46 46
## 47 47
## 48 48
## 49 49
## 50 50
## 51 51
## 52 52
## 53 53
## 54 54
## 55 55
## 56 56
## 57 57
## 58 58
## 59 59
## 60 60
## 61 61
## 62 62
## 63 63
## 64 64
## 65 65
## 66 66
## 67 67
## 68 68
## 69 69
## 70 70
## 71 71
## 72 72
## 73 73
## 74 74
## 75 75
## 76 76
## 77 77
## 78 78
## 79 79
## 80 80
## 81 81
## 82 82
## 83 83
## 84 84
## 85 85
## 86 86
## 87 87
## 88 88
## 89 89
## 90 90 (90+ in 1980 and 1990)
## 91 91
## 92 92
## 93 93
## 94 94
## 95 95
## 96 96
## 97 97
## 98 98
## 99 99
## 100 100 (100+ in 1960-1970)
## 101 101
## 102 102
## 103 103
## 104 104
## 105 105
## 106 106
## 107 107
## 108 108
## 109 109
## 110 110
## 111 111
## 112 112 (112+ in the 1980 internal data)
## 113 113
## 114 114
## 115 115 (115+ in the 1990 internal data)
## 116 116
## 117 117
## 118 118
## 119 119
## 120 120
## 121 121
## 122 122
## 123 123
## 124 124
## 125 125
## 126 126
## 129 129
## 130 130
## 135 135
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.80059 0.04428 785.939 < 2e-16 ***
## FinancialStability -0.12072 0.12704 -0.950 0.342
## scale(inctot) 2.52516 0.24847 10.163 < 2e-16 ***
## scale(hwsei) 0.90535 0.14134 6.405 1.52e-10 ***
## scale(incwage) NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.034 on 41619 degrees of freedom
## Multiple R-squared: 0.08281, Adjusted R-squared: 0.08275
## F-statistic: 1253 on 3 and 41619 DF, p-value: < 2.2e-16
mypred4<-lm(nchild~FinancialStability+scale(inctot)+scale(hwsei)+scale(incwage), data=newpums)
summary(mypred4)
##
## Call:
## lm(formula = nchild ~ FinancialStability + scale(inctot) + scale(hwsei) +
## scale(incwage), data = newpums)
##
## Residuals:
## <Labelled double>
## Min 1Q Median 3Q Max
## -2.3053 -0.9508 -0.0628 0.9905 8.0865
##
## Labels:
## value label
## 0 0 children present
## 1 1 child present
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9+
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.976599 0.005679 171.960 < 2e-16 ***
## FinancialStability -0.059010 0.016294 -3.622 0.000293 ***
## scale(inctot) 0.176158 0.031869 5.528 3.27e-08 ***
## scale(hwsei) 0.082842 0.018129 4.570 4.90e-06 ***
## scale(incwage) NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.159 on 41619 degrees of freedom
## Multiple R-squared: 0.00442, Adjusted R-squared: 0.004349
## F-statistic: 61.6 on 3 and 41619 DF, p-value: < 2.2e-16
mypred5<-lm(fertyr~FinancialStability+scale(inctot)+scale(hwsei)+scale(incwage), data=newpums)
summary(mypred5)
##
## Call:
## lm(formula = fertyr ~ FinancialStability + scale(inctot) + scale(hwsei) +
## scale(incwage), data = newpums)
##
## Residuals:
## <Labelled double>
## Min 1Q Median 3Q Max
## -0.05925 -0.04815 -0.04487 -0.04306 1.00863
##
## Labels:
## value label
## 0 n/a
## 1 no
## 2 yes
## 8 suppressed
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0457680 0.0010242 1021.060 <2e-16 ***
## FinancialStability 0.0006212 0.0029384 0.211 0.833
## scale(inctot) -0.0042335 0.0057473 -0.737 0.461
## scale(hwsei) 0.0034515 0.0032694 1.056 0.291
## scale(incwage) NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.209 on 41619 degrees of freedom
## Multiple R-squared: 0.0003585, Adjusted R-squared: 0.0002865
## F-statistic: 4.976 on 3 and 41619 DF, p-value: 0.001882
F-Test
anova(mypred1,mypred2,mypred3,mypred4,mypred5)
## Warning in anova.lmlist(object, ...): models with response 'c("sex", "age",
## "nchild", "fertyr")' removed because response differs from model 1
## Analysis of Variance Table
##
## Response: educ
## Df Sum Sq Mean Sq F value Pr(>F)
## FinancialStability 1 51769 51769 20808.7 < 2.2e-16 ***
## scale(inctot) 1 14369 14369 5775.6 < 2.2e-16 ***
## scale(hwsei) 1 3950 3950 1587.8 < 2.2e-16 ***
## Residuals 41619 103542 2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AICs<-AIC(mypred1,mypred2,mypred3,mypred4,mypred5)
AICs$diff<-AICs$AIC-AICs$AIC[1]
AICs
## df AIC diff
## mypred1 5 156062.94 0.0
## mypred2 5 -2211536.77 -2367599.7
## mypred3 5 301348.01 145285.1
## mypred4 5 130386.04 -25676.9
## mypred5 5 -12206.39 -168269.3