library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(nortest)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ipumsr)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.1 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%() masks ggplot2::%+%()
## x psych::alpha() masks ggplot2::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x MASS::select() masks dplyr::select()
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(knitr)
Psid<- read_dta("C:/Users/chris/Downloads/psid_cds.dta")
View(Psid)
newPsid <- na.omit(Psid)
Psid<- read_dta("C:/Users/chris/Downloads/psid_cds.dta")
Psidrecode <- Psid %>%
mutate(tenure=ifelse(tenure==5,0,
ifelse(tenure==1,1,NA)))
is.factor(Psid$crace3)
## [1] FALSE
mbic <- aov(cbmi ~ crace3, data= newPsid)
anova(mbic)
## Analysis of Variance Table
##
## Response: cbmi
## Df Sum Sq Mean Sq F value Pr(>F)
## crace3 1 159 158.55 2.1148 0.146
## Residuals 3571 267719 74.97
t.test(cbmi~csex, data = newPsid)
##
## Welch Two Sample t-test
##
## data: cbmi by csex
## t = 1.3577, df = 3568.9, p-value = 0.1746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1745978 0.9609947
## sample estimates:
## mean in group 1 mean in group 2
## 16.55198 16.15878
References
summary(newPsid$adjfinc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 20675 42879 58285 74641 4824656
sd(newPsid$adjfinc)
## [1] 118676.6
summary(newPsid$educ)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 12.00 13.00 13.74 16.00 20.00
sd(newPsid$educ)
## [1] 3.076806
summary(newPsid$emp2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 1.000 0.663 1.000 1.000
sd(newPsid$emp2)
## [1] 0.4727413
fit <- lm( cbmi ~adjfinc + educ + emp2, data=newPsid)
summary(fit)
##
## Call:
## lm(formula = cbmi ~ adjfinc + educ + emp2, data = newPsid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.189 -1.818 0.571 4.235 38.899
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.719e+01 6.730e-01 25.545 < 2e-16 ***
## adjfinc -4.292e-06 1.256e-06 -3.417 0.000639 ***
## educ -5.227e-02 4.912e-02 -1.064 0.287407
## emp2 1.943e-01 3.116e-01 0.624 0.532891
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.645 on 3569 degrees of freedom
## Multiple R-squared: 0.00426, Adjusted R-squared: 0.003423
## F-statistic: 5.089 on 3 and 3569 DF, p-value: 0.001625
anova(fit)
## Analysis of Variance Table
##
## Response: cbmi
## Df Sum Sq Mean Sq F value Pr(>F)
## adjfinc 1 1041 1041.43 13.9346 0.0001922 ***
## educ 1 71 70.56 0.9442 0.3312718
## emp2 1 29 29.07 0.3890 0.5328906
## Residuals 3569 266737 74.74
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#For Maternal Employment - Compared to women who are not currently employed, women who are currently employed are expected to have children with a bmi 1.94 more, holding all other variables constant. The difference is not statistically significant based on a p-value of 0.532891 which is higher than 0.05.
In addition to the three family socioeconomic background variables you identified from 5), previous research suggests that several demographic variables are also important predictors of body mass index, including child’s age, sex, race, and low birth weight status.
summarise(newPsid, Min_value=min(cage, na.rm = T), Max_value=max(cage, na.rm = T),
Mean_value=mean(cage, na.rm = T), sd_value=sd(cage, na.rm = T), Median_value=median(cage, na.rm = T))
## # A tibble: 1 x 5
## Min_value Max_value Mean_value sd_value Median_value
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 17 8.41 4.54 8
summary(newPsid$csex)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.505 2.000 2.000
sd(newPsid$csex)
## [1] 0.5000432
summary(newPsid$crace3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.476 2.000 3.000
sd(newPsid$crace3)
## [1] 0.5870847
summary(newPsid$lbw)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.08928 0.00000 1.00000
sd(newPsid$lbw)
## [1] 0.2851884
fit2 <- lm(cbmi ~ factor(emp2) + factor(csex) + factor(crace3) + factor(lbw) + adjfinc + educ + cage, data = newPsid)
summary(fit2)
##
## Call:
## lm(formula = cbmi ~ factor(emp2) + factor(csex) + factor(crace3) +
## factor(lbw) + adjfinc + educ + cage, data = newPsid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.836 -1.670 1.267 4.034 40.880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.163e+01 7.433e-01 15.644 < 2e-16 ***
## factor(emp2)1 -4.339e-01 2.953e-01 -1.469 0.1418
## factor(csex)2 -1.290e-01 2.724e-01 -0.474 0.6358
## factor(crace3)2 1.804e-01 2.953e-01 0.611 0.5412
## factor(crace3)3 -1.699e+00 6.574e-01 -2.584 0.0098 **
## factor(lbw)1 -2.871e-02 4.799e-01 -0.060 0.9523
## adjfinc -5.814e-06 1.195e-06 -4.867 1.18e-06 ***
## educ -5.702e-03 4.705e-02 -0.121 0.9035
## cage 6.552e-01 3.041e-02 21.550 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.132 on 3564 degrees of freedom
## Multiple R-squared: 0.1202, Adjusted R-squared: 0.1182
## F-statistic: 60.86 on 8 and 3564 DF, p-value: < 2.2e-16
anova(fit, fit2)
## Analysis of Variance Table
##
## Model 1: cbmi ~ adjfinc + educ + emp2
## Model 2: cbmi ~ factor(emp2) + factor(csex) + factor(crace3) + factor(lbw) +
## adjfinc + educ + cage
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3569 266737
## 2 3564 235681 5 31055 93.924 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(fit2, which=1)
plot(fit2)
bptest(fit2)
##
## studentized Breusch-Pagan test
##
## data: fit2
## BP = 113.7, df = 8, p-value < 2.2e-16
ad.test(resid(fit2))
##
## Anderson-Darling normality test
##
## data: resid(fit2)
## A = 146.32, p-value < 2.2e-16