install.packages("dlpyr", repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/Uriel/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## Warning: package 'dlpyr' is not available (for R version 3.4.1)
install.packages("ggplot2", repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/Uriel/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Uriel\AppData\Local\Temp\Rtmpe46sfR\downloaded_packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven)
library(ggplot2)
acs<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")
1) Filter the data to only have households heads and create a histogram of family size.
acs%>%
filter(relate == 1) %>%
ggplot(.) + geom_histogram(aes(famsize), bins = 20, binwidth = .5) +
scale_x_discrete("Number of own family members in household", limits = c(1:20)) +
ggtitle("Family size","Data from ACS 2015") +
xlab("Number of own family members in household") +
ylab("Number of Households")

2) Estimate the average family size for families whose head was born outside the US and those who were born in the US.
acs %>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born", acs$bpl %in% c(121:998)~"Foreign born")) %>%
filter(relate==1) %>%
group_by(usborn) %>%
summarise(mean(famsize), median(famsize), sd(famsize), n())
## # A tibble: 2 x 5
## usborn `mean(famsize)` `median(famsize)` `sd(famsize)` `n()`
## <chr> <dbl> <dbl+lbl> <dbl> <int>
## 1 Foreign born 2.934445 3 1.683525 15895
## 2 US born 2.290301 2 1.332221 101412
3) Create the histogram from part 2 for US born and Foreign born, as well as a box and whisker plot for the two groups.
Histogram
acs %>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born", acs$bpl %in% c(121:998)~"Foreign born")) %>%
filter(relate==1) %>%
group_by(usborn) %>%
ggplot(.)+
geom_histogram(mapping = aes(famsize), binwidth = .5)+
scale_x_discrete("Number of own family members in household", limits = seq(0, 20, by = 2)) +
facet_wrap(~usborn)+
ggtitle("Family size by birthplace of household head","Data from ACS 2015")+
xlab("Number of own family members in household")+
ylab("Number of households")

Boxplot
acs %>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born", acs$bpl %in% c(121:998)~"Foreign born")) %>%
filter(relate==1) %>%
group_by(usborn) %>%
ggplot(.)+
geom_boxplot(aes(usborn,famsize), ymin=1, ymax=20)+
ggtitle("Boxplot of family size by birthplace of household head","Data from ACS 2015")+
xlab("Household head by birthplace")+
ylab("Number of own family members in household")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.

4) Estimate the average family size by age and create a x-y scatter plot of these estimates using the average family size as the y-axis and age of the household head as the x-axis.
acs %>%
mutate(usborn= case_when(acs$bpl %in% c(1:120)~"US born", acs$bpl %in% c(121:998)~"Foreign born")) %>%
filter(relate==1) %>%
group_by(age) %>%
summarise(avefs=mean(famsize), sd(famsize), n()) %>%
ggplot(.)+
geom_point(aes(age, avefs),size=.9)+
ggtitle("Average of family size by age of the household head", "Data from ACS 2015")+
xlab("Age")+
ylab("Average of family size")+
scale_x_discrete("Age", limits=seq(15,105, by= 5))+
geom_smooth(aes(age, avefs))
## `geom_smooth()` using method = 'loess'
