#PROJECT DESCRIPTION
Production is one of the oldest and most intriguing concepts in modern discourse. Labor, as the key driver of production, has attracted substantial scholarly interest over the last three to four decades, particularly with the increasing participation of women. This project investigates the effect of having more than two children on maternal labor supply, focusing on gender equality and labor market dynamics. By analyzing 1980 US Census data, we aim to provide insights into historical trends that can inform current policies supporting working mothers.
#LOAD LIBRARIES
#LOAD DATASET
# Load the dataset from a CSV file
Fertility <- read.csv("fertility2.csv")
# View the first few rows of the dataset
head(Fertility)
## rownames morekids gender1 gender2 age afam hispanic other work
## 1 1 no female female 35 no no no 40
## 2 2 no female male 33 no no no 52
## 3 3 no female female 28 no no no 12
## 4 4 no male male 28 no no no 0
## 5 5 no female male 31 no no no 0
## 6 6 no female male 33 no no no 0
#CLEAN DATASET
# Select relevant variables and rename them for clarity
Fertility <- Fertility %>%
select(morekids, gender1, gender2, age, afam, hispanic, other, work) %>%
rename(
MoreThanTwoChildren = morekids,
GenderFirstChild = gender1,
GenderSecondChild = gender2,
Age = age,
AfricanAmerican = afam,
Hispanic = hispanic,
OtherEthnicities = other,
WeeksWorked = work
)
# Convert relevant variables to factors
Fertility <- Fertility %>%
mutate(
MoreThanTwoChildren = factor(MoreThanTwoChildren),
GenderFirstChild = factor(GenderFirstChild),
GenderSecondChild = factor(GenderSecondChild),
AfricanAmerican = factor(AfricanAmerican),
Hispanic = factor(Hispanic),
OtherEthnicities = factor(OtherEthnicities)
)
# Handle missing values (if any)
Fertility <- Fertility %>%
drop_na()
#DESCRIPTIVE STATISTICS
datasummary_skim(Fertility)
| Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|
| Age | 15 | 0 | 30.4 | 3.4 | 21.0 | 31.0 | 35.0 | |
| WeeksWorked | 53 | 0 | 19.2 | 21.9 | 0.0 | 6.0 | 52.0 | |
| N | % | |||||||
| MoreThanTwoChildren | no | 18672 | 62.2 | |||||
| yes | 11328 | 37.8 | ||||||
| GenderFirstChild | female | 14549 | 48.5 | |||||
| male | 15451 | 51.5 | ||||||
| GenderSecondChild | female | 14818 | 49.4 | |||||
| male | 15182 | 50.6 | ||||||
| AfricanAmerican | no | 28402 | 94.7 | |||||
| yes | 1598 | 5.3 | ||||||
| Hispanic | no | 27768 | 92.6 | |||||
| yes | 2232 | 7.4 | ||||||
| OtherEthnicities | no | 28295 | 94.3 | |||||
| yes | 1705 | 5.7 |
#EXPLORATORY DATA VISUALIZATION
#1. SCATTER PLOT
# Scatter plot of Weeks Worked vs. Age with color by More Than Two Children
ggplot(Fertility, aes(x = Age, y = WeeksWorked, color = MoreThanTwoChildren)) +
geom_point(alpha = 0.6) +
labs(title = "Weeks Worked vs. Age",
x = "Age",
y = "Weeks Worked") +
theme_minimal()
#HISTOGRAM
# Histogram of Weeks Worked by More Than Two Children
ggplot(Fertility, aes(x = WeeksWorked, fill = MoreThanTwoChildren)) +
geom_histogram(binwidth = 5, alpha = 0.7, position = "dodge") +
labs(title = "Distribution of Weeks Worked by More Than Two Children",
x = "Weeks Worked",
y = "Count") +
theme_minimal()
#3. BOX PLOT
# Box plot of Weeks Worked by Ethnicity
ggplot(Fertility, aes(x = AfricanAmerican, y = WeeksWorked, fill = AfricanAmerican)) +
geom_boxplot() +
labs(title = "Weeks Worked by Ethnicity (African American)",
x = "African American",
y = "Weeks Worked") +
theme_minimal()
ggplot(Fertility, aes(x = Hispanic, y = WeeksWorked, fill = Hispanic)) +
geom_boxplot() +
labs(title = "Weeks Worked by Ethnicity (Hispanic)",
x = "Hispanic",
y = "Weeks Worked") +
theme_minimal()
ggplot(Fertility, aes(x = OtherEthnicities, y = WeeksWorked, fill = OtherEthnicities)) +
geom_boxplot() +
labs(title = "Weeks Worked by Ethnicity (Other Ethnicities)",
x = "Other Ethnicities",
y = "Weeks Worked") +
theme_minimal()
# Box plot of Weeks Worked by Gender of the First Child
ggplot(Fertility, aes(x = GenderFirstChild, y = WeeksWorked, fill = GenderFirstChild)) +
geom_boxplot() +
labs(title = "Weeks Worked by Gender of the First Child",
x = "Gender of the First Child",
y = "Weeks Worked") +
theme_minimal()
#INITIAL REGRESSION ANALYSIS
# Simple linear regression model: MoreThanTwoChildren predicting WeeksWorked
model1 <- lm(WeeksWorked ~ MoreThanTwoChildren, data = Fertility)
# Multiple linear regression model: Including demographic variables
model2 <- lm(WeeksWorked ~ MoreThanTwoChildren + GenderFirstChild + GenderSecondChild + Age + AfricanAmerican + Hispanic + OtherEthnicities, data = Fertility)
# Present the results in a regression table
modelsummary(list(model1, model2), output = "html")
| (1) | (2) | |
|---|---|---|
| (Intercept) | 21.478 | −4.480 |
| (0.159) | (1.135) | |
| MoreThanTwoChildrenyes | −6.008 | −6.898 |
| (0.259) | (0.258) | |
| GenderFirstChildmale | 0.182 | |
| (0.247) | ||
| GenderSecondChildmale | −0.275 | |
| (0.247) | ||
| Age | 0.842 | |
| (0.037) | ||
| AfricanAmericanyes | 11.532 | |
| (0.553) | ||
| Hispanicyes | −0.252 | |
| (0.522) | ||
| OtherEthnicitiesyes | 3.344 | |
| (0.589) | ||
| Num.Obs. | 30000 | 30000 |
| R2 | 0.018 | 0.048 |
| R2 Adj. | 0.018 | 0.048 |
| AIC | 269909.4 | 268987.6 |
| BIC | 269934.3 | 269062.4 |
| Log.Lik. | −134951.681 | −134484.817 |
| F | 538.156 | 214.745 |
| RMSE | 21.75 | 21.41 |
#END