#PROJECT DESCRIPTION

Production is one of the oldest and most intriguing concepts in modern discourse. Labor, as the key driver of production, has attracted substantial scholarly interest over the last three to four decades, particularly with the increasing participation of women. This project investigates the effect of having more than two children on maternal labor supply, focusing on gender equality and labor market dynamics. By analyzing 1980 US Census data, we aim to provide insights into historical trends that can inform current policies supporting working mothers.

#LOAD LIBRARIES

#LOAD DATASET

# Load the dataset from a CSV file
Fertility <- read.csv("fertility2.csv")

# View the first few rows of the dataset
head(Fertility)
##   rownames morekids gender1 gender2 age afam hispanic other work
## 1        1       no  female  female  35   no       no    no   40
## 2        2       no  female    male  33   no       no    no   52
## 3        3       no  female  female  28   no       no    no   12
## 4        4       no    male    male  28   no       no    no    0
## 5        5       no  female    male  31   no       no    no    0
## 6        6       no  female    male  33   no       no    no    0

#CLEAN DATASET

# Select relevant variables and rename them for clarity
Fertility <- Fertility %>%
  select(morekids, gender1, gender2, age, afam, hispanic, other, work) %>%
  rename(
    MoreThanTwoChildren = morekids,
    GenderFirstChild = gender1,
    GenderSecondChild = gender2,
    Age = age,
    AfricanAmerican = afam,
    Hispanic = hispanic,
    OtherEthnicities = other,
    WeeksWorked = work
  )

# Convert relevant variables to factors
Fertility <- Fertility %>%
  mutate(
    MoreThanTwoChildren = factor(MoreThanTwoChildren),
    GenderFirstChild = factor(GenderFirstChild),
    GenderSecondChild = factor(GenderSecondChild),
    AfricanAmerican = factor(AfricanAmerican),
    Hispanic = factor(Hispanic),
    OtherEthnicities = factor(OtherEthnicities)
  )

# Handle missing values (if any)
Fertility <- Fertility %>%
  drop_na()

#DESCRIPTIVE STATISTICS

datasummary_skim(Fertility)
tinytable_eg28wssoelj6di0f6piw
Unique Missing Pct. Mean SD Min Median Max Histogram
Age 15 0 30.4 3.4 21.0 31.0 35.0
WeeksWorked 53 0 19.2 21.9 0.0 6.0 52.0
N %
MoreThanTwoChildren no 18672 62.2
yes 11328 37.8
GenderFirstChild female 14549 48.5
male 15451 51.5
GenderSecondChild female 14818 49.4
male 15182 50.6
AfricanAmerican no 28402 94.7
yes 1598 5.3
Hispanic no 27768 92.6
yes 2232 7.4
OtherEthnicities no 28295 94.3
yes 1705 5.7

#EXPLORATORY DATA VISUALIZATION

#1. SCATTER PLOT

# Scatter plot of Weeks Worked vs. Age with color by More Than Two Children
ggplot(Fertility, aes(x = Age, y = WeeksWorked, color = MoreThanTwoChildren)) +
  geom_point(alpha = 0.6) +
  labs(title = "Weeks Worked vs. Age",
       x = "Age",
       y = "Weeks Worked") +
  theme_minimal()

#HISTOGRAM

# Histogram of Weeks Worked by More Than Two Children
ggplot(Fertility, aes(x = WeeksWorked, fill = MoreThanTwoChildren)) +
  geom_histogram(binwidth = 5, alpha = 0.7, position = "dodge") +
  labs(title = "Distribution of Weeks Worked by More Than Two Children",
       x = "Weeks Worked",
       y = "Count") +
  theme_minimal()

#3. BOX PLOT

# Box plot of Weeks Worked by Ethnicity
ggplot(Fertility, aes(x = AfricanAmerican, y = WeeksWorked, fill = AfricanAmerican)) +
  geom_boxplot() +
  labs(title = "Weeks Worked by Ethnicity (African American)",
       x = "African American",
       y = "Weeks Worked") +
  theme_minimal()

ggplot(Fertility, aes(x = Hispanic, y = WeeksWorked, fill = Hispanic)) +
  geom_boxplot() +
  labs(title = "Weeks Worked by Ethnicity (Hispanic)",
       x = "Hispanic",
       y = "Weeks Worked") +
  theme_minimal()

ggplot(Fertility, aes(x = OtherEthnicities, y = WeeksWorked, fill = OtherEthnicities)) +
  geom_boxplot() +
  labs(title = "Weeks Worked by Ethnicity (Other Ethnicities)",
       x = "Other Ethnicities",
       y = "Weeks Worked") +
  theme_minimal()

# Box plot of Weeks Worked by Gender of the First Child
ggplot(Fertility, aes(x = GenderFirstChild, y = WeeksWorked, fill = GenderFirstChild)) +
  geom_boxplot() +
  labs(title = "Weeks Worked by Gender of the First Child",
       x = "Gender of the First Child",
       y = "Weeks Worked") +
  theme_minimal()

#INITIAL REGRESSION ANALYSIS

# Simple linear regression model: MoreThanTwoChildren predicting WeeksWorked
model1 <- lm(WeeksWorked ~ MoreThanTwoChildren, data = Fertility)

# Multiple linear regression model: Including demographic variables
model2 <- lm(WeeksWorked ~ MoreThanTwoChildren + GenderFirstChild + GenderSecondChild + Age + AfricanAmerican + Hispanic + OtherEthnicities, data = Fertility)

# Present the results in a regression table
modelsummary(list(model1, model2), output = "html")
tinytable_ye05bbgxjq0jmcimcvvm
(1) (2)
(Intercept) 21.478 −4.480
(0.159) (1.135)
MoreThanTwoChildrenyes −6.008 −6.898
(0.259) (0.258)
GenderFirstChildmale 0.182
(0.247)
GenderSecondChildmale −0.275
(0.247)
Age 0.842
(0.037)
AfricanAmericanyes 11.532
(0.553)
Hispanicyes −0.252
(0.522)
OtherEthnicitiesyes 3.344
(0.589)
Num.Obs. 30000 30000
R2 0.018 0.048
R2 Adj. 0.018 0.048
AIC 269909.4 268987.6
BIC 269934.3 269062.4
Log.Lik. −134951.681 −134484.817
F 538.156 214.745
RMSE 21.75 21.41

#END