Homework 5.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven)
library(haven)
library(ggplot2)
acs<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")
Descriptive statistics for U.S. born-household heads and Foreign born-household heads.
usborn<-acs%>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born"))%>%
filter(relate==1, usborn == "US born")
foreignborn<-acs%>%
mutate(usborn= case_when(acs$bpl %in% c(121:998)~"Foreign born"))%>%
filter(relate==1, usborn == "Foreign born")
tribble(~"Birth place of household head", ~"Mean family size", ~"Median family size", ~"Standard deviation", ~"Number of cases", "U.S. Born", mean(usborn$famsize) ,median(usborn$famsize), sd(usborn$famsize), length(usborn$famsize), "Foreign born", mean(foreignborn$famsize),median(foreignborn$famsize), sd(foreignborn$famsize), length(foreignborn$famsize))
## # A tibble: 2 x 5
## `Birth place of household head` `Mean family size` `Median family size`
## <chr> <dbl> <dbl>
## 1 U.S. Born 2.290301 2
## 2 Foreign born 2.934445 3
## # ... with 2 more variables: `Standard deviation` <dbl>, `Number of
## # cases` <int>
Histograms and boxplots.
acs %>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born", acs$bpl %in% c(121:998)~"Foreign born")) %>%
filter(relate==1) %>%
group_by(usborn) %>%
ggplot(.)+
geom_histogram(mapping = aes(famsize), binwidth = .5)+
scale_x_discrete("Number of own family members in household", limits = seq(0, 20, by = 2)) +
facet_wrap(~usborn)+
ggtitle("Family size by birthplace of household head","Data from ACS 2015")+
xlab("Number of own family members in household")+
ylab("Number of households")

acs %>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born", acs$bpl %in% c(121:998)~"Foreign born")) %>%
filter(relate==1) %>%
group_by(usborn) %>%
ggplot(.)+
geom_boxplot(aes(usborn,famsize), ymin=1, ymax=20)+
ggtitle("Boxplot of family size by birthplace of household head","Data from ACS 2015")+
xlab("Household head by birthplace")+
ylab("Number of own family members in household")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.

Are family sizes different for birthplace of the household head?
library(broom)
acs_hh<-acs%>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born", acs$bpl %in% c(121:998)~"Foreign born"))%>%
filter(relate==1)
hh<-lm(famsize~usborn, data=acs_hh)
tidy(hh)
## term estimate std.error statistic p.value
## 1 (Intercept) 2.9344448 0.01098588 267.11061 0
## 2 usbornUS born -0.6441438 0.01181550 -54.51685 0
t.test(acs_hh$famsize~acs_hh$usborn)
##
## Welch Two Sample t-test
##
## data: acs_hh$famsize by acs_hh$usborn
## t = 46.032, df = 19138, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.6167158 0.6715719
## sample estimates:
## mean in group Foreign born mean in group US born
## 2.934445 2.290301
qqnorm(rstudent(hh), main="Q-Q Plot for Model Residuals")
