# Visualization 1: Boxplot of number of children by education level
boxplot_education <- ggplot(selected_data, aes(x = factor(education), y = kids)) +
geom_boxplot() +
labs(title = "Distribution of Number of Children by Education Level", x = "Education Level (years)", y = "Number of Children")
print(boxplot_education)

# Visualization 2: Heatmap of education level vs. number of children
heatmap_education_kids <- ggplot(selected_data, aes(x = education, y = kids)) +
geom_bin2d() +
labs(title = "Heatmap of Education Level vs. Number of Children", x = "Education Level (years)", y = "Number of Children") +
scale_fill_gradient(low = "white", high = "blue")
print(heatmap_education_kids)

# Initial regression analysis: Simple regression
model1 <- lm(kids ~ education, data = selected_data)
summary_model1 <- summary(model1)
# Initial regression analysis: Multiple regression
model2 <- lm(kids ~ education + age + agefirstbirth + ethnicity + immigrant + city16, data = selected_data)
summary_model2 <- summary(model2)
# Print regression summaries
print(summary_model1)
##
## Call:
## lm(formula = kids ~ education, data = selected_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8844 -0.9079 -0.2976 0.5804 6.1907
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.128541 0.114268 36.13 <2e-16 ***
## education -0.122064 0.008658 -14.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.398 on 3310 degrees of freedom
## Multiple R-squared: 0.05665, Adjusted R-squared: 0.05637
## F-statistic: 198.8 on 1 and 3310 DF, p-value: < 2.2e-16
print(summary_model2)
##
## Call:
## lm(formula = kids ~ education + age + agefirstbirth + ethnicity +
## immigrant + city16, data = selected_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6909 -0.8334 -0.1666 0.6328 5.5822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.430103 0.152093 22.553 < 2e-16 ***
## education -0.034677 0.008802 -3.940 8.33e-05 ***
## age 0.030171 0.001380 21.864 < 2e-16 ***
## agefirstbirth -0.085818 0.005072 -16.918 < 2e-16 ***
## ethnicityother 0.166511 0.055772 2.986 0.00285 **
## immigrantyes -0.004762 0.070380 -0.068 0.94605
## city16yes 0.002037 0.045907 0.044 0.96461
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.274 on 3305 degrees of freedom
## Multiple R-squared: 0.2175, Adjusted R-squared: 0.2161
## F-statistic: 153.1 on 6 and 3305 DF, p-value: < 2.2e-16
# Create regression table
modelsummary(list("Simple Model" = model1, "Multiple Model" = model2))
tinytable_p6lzcloslpwi1y0ljh5f
| |
Simple Model |
Multiple Model |
| (Intercept) |
4.129 |
3.430 |
| |
(0.114) |
(0.152) |
| education |
-0.122 |
-0.035 |
| |
(0.009) |
(0.009) |
| age |
|
0.030 |
| |
|
(0.001) |
| agefirstbirth |
|
-0.086 |
| |
|
(0.005) |
| ethnicityother |
|
0.167 |
| |
|
(0.056) |
| immigrantyes |
|
-0.005 |
| |
|
(0.070) |
| city16yes |
|
0.002 |
| |
|
(0.046) |
| Num.Obs. |
3312 |
3312 |
| R2 |
0.057 |
0.218 |
| R2 Adj. |
0.056 |
0.216 |
| AIC |
11623.7 |
11014.4 |
| BIC |
11642.0 |
11063.2 |
| Log.Lik. |
-5808.826 |
-5499.198 |
| RMSE |
1.40 |
1.27 |