library(haven)
library(readr)
ipums<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")
onlyheads<-subset(ipums, relate==1)
or as in 2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
ipums%>%
filter(relate==1) %>%
ggplot()+
geom_histogram(aes(famsize))
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Average etc.
ipums%>%
mutate(bornus= case_when(ipums$bpl %in% c(1:120)~"US Born",ipums$bpl %in% c(120:998)~"Foreign Born"))%>%
filter(relate==1) %>%
group_by(bornus) %>%
summarise(mean(famsize), sd(famsize))
## # A tibble: 2 x 3
## bornus `mean(famsize)` `sd(famsize)`
## <chr> <dbl> <dbl>
## 1 Foreign Born 2.934445 1.683525
## 2 US Born 2.290301 1.332221
Histogram
ipums%>%
mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
filter(relate==1) %>%
ggplot(.)+
geom_histogram(mapping = aes(famsize), binwidth = .5)+
scale_x_discrete("number of family members", limits = seq(0,20,by=2))+
facet_wrap(~bornus)+
ggtitle("Family Size by Birth Place")+
xlab("Size")+
ylab("number of households")
Box Plot
ipums%>%
mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
filter(relate==1) %>%
ggplot(.)+
geom_boxplot(aes(bornus,famsize),ymin=1,ymax=20)+
ggtitle("Family Size by Birth Place")+
xlab("head by birthplace")+
ylab("number of houshold members")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.
ipums%>%
mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
filter(relate==1) %>%
group_by(age)%>%
summarise(avsize=mean(famsize))%>%
ggplot(.)+
geom_point(aes(age,avsize),size=.9)+
ggtitle("Average by Family Size by Houshold Head")+
xlab("age")+
ylab("average number of houshold members")+
scale_x_discrete("Age",limits=seq(15,105,by=5))+
geom_smooth(aes(age,avsize))
## `geom_smooth()` using method = 'loess'