Load IPUMS Data from Github

1 Filter data for only Household Heads

HHO<-subset(ipums,relate==1)
HHO$famsizenew<-as.numeric(HHO$famsize)
summary(HHO$famsizenew)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   2.378   3.000  20.000

2 Create histogram of Family Size

hist(HHO$famsizenew, main = "Family Size Histogram", ylab = "Frequency", xlab = "Family Size")

Create a histogram of Family Size with GGplot

ggplot(HHO)+
  geom_histogram(aes(famsizenew), binwidth = 1)+
  ggtitle(label = "Family Size Histogram")+
  ylab(label = "Frequency")+
  xlab(label = "Family Size")

3 Estimate the average family size for families who are U.S. born and foreign born

HHO$USborn<- ifelse(HHO$bpl<=120, 1, 0)
aggregate(HHO$famsizenew, list(native=HHO$USborn),mean)
##   native        x
## 1      0 2.934445
## 2      1 2.290301
aggregate(HHO$famsizenew, list(native=HHO$USborn),sd)
##   native        x
## 1      0 1.683525
## 2      1 1.332221

4 Create Histogram for U.S. Born

ggplot(subset(HHO,USborn==1))+
  geom_histogram(aes(famsizenew), binwidth = 1)+
  ggtitle(label = "Family Size Histogram for U.S. Born")+
  ylab(label = "Frequency")+
  xlab(label = "Family Size")

4 Create Histogram for Foreign Born

ggplot(subset(HHO,USborn==0))+
  geom_histogram(aes(famsizenew), binwidth = 1)+
  ggtitle(label = "Family Size Histogram for Foreign Born")+
  ylab(label = "Frequency")+
  xlab(label = "Family Size")

Create Box and Whisker Plot for Foreign Born vs. U.S. Born

ggplot(HHO)+
  geom_boxplot(aes(x= as.factor(USborn), y=famsizenew))+
  ggtitle(label = "Family Size Boxplot for Foreign Born and U.S. Born")+
  ylab(label = "Family Size")+
  xlab(label = "Foreign Born      vs.      U.S. Born")

Estimate Average family size by age with scatterplot

ggplot(HHO)+
  geom_point(mapping= aes(x= age, y=famsizenew))+
  ggtitle(label = "Scatterplot of Family Size by Age of Household Head")+
  ylab(label = "Family Size")+
  xlab(label = "Age of Household Head")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.

Estimate Average family size by age with scatterplot and smooth line trend

ggplot(HHO)+
  geom_point(mapping= aes(x= age, y=famsizenew))+
  geom_smooth(mapping= aes(x=age, y=famsizenew))+
  ggtitle(label = "Scatterplot of Family Size by Age of Household Head")+
  ylab(label = "Family Size")+
  xlab(label = "Age of Household Head")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.
## `geom_smooth()` using method = 'gam'