library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
mySE<-function(x){sd(x,na.rm=T)/length(x)}
ipums<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")
myForeignHead<-subset(ipums,bpl>=121,bpl<=998)
myUSHead<-subset(ipums,bpl<=120,bpl>=1)
myForeignHeadR<-subset(myForeignHead,relate==1)
myUSHeadR<-subset(myUSHead,relate==1)
1) Descriptive Statistics and Graphs for each Group (US vs. Foreign Born Houshold Heads)
Creating a subset to only use households with “Head of Household” (as done before):
onlyheads<-subset(ipums, relate==1)
Average, SD, Standard Error of Family Size, Head inside U.S. / outside U.S. born from previous homework
onlyheads<-ipums%>%
mutate(bornus= case_when(ipums$bpl %in% c(1:120)~"US Born",ipums$bpl %in% c(120:998)~"Foreign Born"))%>%
filter(relate==1)
onlyheads%>%
group_by(bornus) %>%
summarise(mean(famsize), sd(famsize), mySE(famsize))
## # A tibble: 2 x 4
## bornus `mean(famsize)` `sd(famsize)` `mySE(famsize)`
## <chr> <dbl> <dbl> <dbl>
## 1 Foreign Born 2.934445 1.683525 1.059154e-04
## 2 US Born 2.290301 1.332221 1.313672e-05
Histogram
ipums%>%
mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
filter(relate==1) %>%
ggplot(.)+
geom_histogram(mapping = aes(famsize), binwidth = .5)+
scale_x_discrete("number of family members", limits = seq(0,20,by=2))+
facet_wrap(~bornus)+
ggtitle("Family Size by Birth Place")+
xlab("Size")+
ylab("number of households")

Box Plot
ipums%>%
mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
filter(relate==1) %>%
ggplot(.)+
geom_boxplot(aes(bornus,famsize),ymin=1,ymax=20)+
ggtitle("Family Size by Birth Place")+
xlab("head by birthplace")+
ylab("number of houshold members")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.

2) Linear Model Test for Equality of the Family Size Variable
Summary using the Linear Model
library(broom)
myPlot1<-lm(famsize~bornus,data=onlyheads)
tidy(myPlot1)
## term estimate std.error statistic p.value
## 1 (Intercept) 2.9344448 0.01098588 267.11061 0
## 2 bornusUS Born -0.6441438 0.01181550 -54.51685 0
Graph
qqnorm(rstudent(myPlot1), main="Q-Q Plot for Family Size")

Transforamtion using Logarithm
library(broom)
myPlot2<-lm(log(famsize)~bornus,data=onlyheads)
tidy(myPlot2)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.9053601 0.004402847 205.63063 0
## 2 bornusUS Born -0.2286906 0.004735337 -48.29448 0
Logarythmic Graph
qqnorm(rstudent(myPlot2), main="Q-Q Plot for Logarithmic Family Size Graph")

Square Rooted Graph
qqnorm(rstudent(myPlot3), main="Q-Q Plot for Square Rooted Family Size")

Reciprocal Graph
qqnorm(rstudent(myPlot4), main="Q-Q Plot for Reciprocal Family Size")
