Load IPUMS Data from Github
1 Filter data for only Household Heads
HHO<-subset(ipums,relate==1)
HHO$famsizenew<-as.numeric(HHO$famsize)
summary(HHO$famsizenew)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 2.378 3.000 20.000
2 Create histogram of Family Size
hist(HHO$famsizenew, main = "Family Size Histogram", ylab = "Frequency", xlab = "Family Size")

Create a histogram of Family Size with GGplot
ggplot(HHO)+
geom_histogram(aes(famsizenew), binwidth = 1)+
ggtitle(label = "Family Size Histogram")+
ylab(label = "Frequency")+
xlab(label = "Family Size")

3 Estimate the average family size for families who are U.S. born and foreign born
HHO$USborn<- ifelse(HHO$bpl<=120, 1, 0)
aggregate(HHO$famsizenew, list(native=HHO$USborn),mean)
## native x
## 1 0 2.934445
## 2 1 2.290301
aggregate(HHO$famsizenew, list(native=HHO$USborn),sd)
## native x
## 1 0 1.683525
## 2 1 1.332221
4 Create Histogram for U.S. Born
ggplot(subset(HHO,USborn==1))+
geom_histogram(aes(famsizenew), binwidth = 1)+
ggtitle(label = "Family Size Histogram for U.S. Born")+
ylab(label = "Frequency")+
xlab(label = "Family Size")

4 Create Histogram for Foreign Born
ggplot(subset(HHO,USborn==0))+
geom_histogram(aes(famsizenew), binwidth = 1)+
ggtitle(label = "Family Size Histogram for Foreign Born")+
ylab(label = "Frequency")+
xlab(label = "Family Size")

Create Box and Whisker Plot for Foreign Born vs. U.S. Born
ggplot(HHO)+
geom_boxplot(aes(x= as.factor(USborn), y=famsizenew))+
ggtitle(label = "Family Size Boxplot for Foreign Born and U.S. Born")+
ylab(label = "Family Size")+
xlab(label = "Foreign Born vs. U.S. Born")

Estimate Average family size by age with scatterplot
ggplot(HHO)+
geom_point(mapping= aes(x= age, y=famsizenew))+
ggtitle(label = "Scatterplot of Family Size by Age of Household Head")+
ylab(label = "Family Size")+
xlab(label = "Age of Household Head")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.

Estimate Average family size by age with scatterplot and smooth line trend
ggplot(HHO)+
geom_point(mapping= aes(x= age, y=famsizenew))+
geom_smooth(mapping= aes(x=age, y=famsizenew))+
ggtitle(label = "Scatterplot of Family Size by Age of Household Head")+
ylab(label = "Family Size")+
xlab(label = "Age of Household Head")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.
## `geom_smooth()` using method = 'gam'
