# Read in the data
ddi <- read_ipums_ddi("/Users/xiaoling/Desktop/ecp3203/Assignments /Assignment_4/cps_00010.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.
# Change column names to all lowercase
colnames(data) <- tolower(colnames(data))
# Sample to use in the analysis
# Filter the data using filter() function
filtered_data <- data %>%
filter(age >= 21 & age <= 64, # Workers aged 21-64
incwage > 0 & incwage < 99999998, # Having valid values for annual wage and salary income
wkswork2 > 1 & wkswork2 < 6, # Weeks worked greater than 1 and less than 6
asecwt >= 0) # Exclude persons who have a negative sampling weight
# Create variables
filtered_data <- filtered_data %>%
mutate(
schooling = case_when (educ == 20 ~ 5.5,
TRUE ~ educ), # Code 20 corresponds to 5 or 6 years of schooling
weeks_worked = case_when (wkswork2 == 4 ~ 43.5,
TRUE ~ wkswork2), # Code 4 represents 40-47 weeks worked
lwkearnings = log(incwage / weeks_worked), # Log of weekly earnings
experience = age - schooling - 6 # Years of labor market experience
)
# Statistical analysis
# 1.Estimate the Mincer earning function separately for men and women.
library(broom)
# Create a squared experience variable
filtered_data <- filtered_data %>%
mutate(
experience_squared = experience^2
)
# Separate the data for men and women
data_men <- filtered_data %>%
filter(sex == 1)
data_women <- filtered_data %>%
filter(sex == 2)
# Fit the model for men
model_men <- lm(lwkearnings ~ schooling + experience + experience_squared, data = data_men)
summary_men <- summary(model_men)
tidy_men <- tidy(model_men)
# Fit the model for women
model_women <- lm(lwkearnings ~ schooling + experience + experience_squared, data = data_women)
summary_women <- summary(model_women)
tidy_women <- tidy(model_women)
# Print the results
print("Regression results for men:")
## [1] "Regression results for men:"
print(summary_men)
##
## Call:
## lm(formula = lwkearnings ~ schooling + experience + experience_squared,
## data = data_men)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9009 -1.0670 0.2252 1.0912 19.5114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.993e+00 1.029e-02 582.46 <2e-16 ***
## schooling 2.848e-02 2.336e-04 121.92 <2e-16 ***
## experience 1.567e-02 2.199e-04 71.24 <2e-16 ***
## experience_squared -3.227e-05 1.249e-06 -25.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.455 on 327773 degrees of freedom
## Multiple R-squared: 0.05422, Adjusted R-squared: 0.05421
## F-statistic: 6264 on 3 and 327773 DF, p-value: < 2.2e-16
print(tidy_men)
## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 5.99 0.0103 582. 0
## 2 schooling 0.0285 0.000234 122. 0
## 3 experience 0.0157 0.000220 71.2 0
## 4 experience_squared -0.0000323 0.00000125 -25.8 5.39e-147
print("Regression results for women:")
## [1] "Regression results for women:"
print(summary_women)
##
## Call:
## lm(formula = lwkearnings ~ schooling + experience + experience_squared,
## data = data_women)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3494 -0.9933 0.1572 1.0838 5.8929
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.632e+00 1.040e-02 541.662 < 2e-16 ***
## schooling 1.939e-02 2.201e-04 88.065 < 2e-16 ***
## experience 6.797e-03 2.477e-04 27.444 < 2e-16 ***
## experience_squared 1.523e-05 1.958e-06 7.778 7.4e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.464 on 407453 degrees of freedom
## Multiple R-squared: 0.05423, Adjusted R-squared: 0.05422
## F-statistic: 7788 on 3 and 407453 DF, p-value: < 2.2e-16
print(tidy_women)
## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 5.63 0.0104 542. 0
## 2 schooling 0.0194 0.000220 88.1 0
## 3 experience 0.00680 0.000248 27.4 1.15e-165
## 4 experience_squared 0.0000152 0.00000196 7.78 7.40e- 15
# 2. Retrieve the coefficient of the education variable as the rate of return to schooling
# for men and women
# For men
schooling_coef_men <- coef(model_men)["schooling"]
# Convert log points to percentage
percentage_change_men <- (exp(schooling_coef_men) - 1) * 100
# For women
schooling_coef_women <- coef(model_women)["schooling"]
# Convert log points to percentage
percentage_change_women <- (exp(schooling_coef_women) - 1) * 100
# Print the report
cat("
# Report on the Rate of Return to Schooling for Men and Women
This report presents the results of an analysis on the rate of return to schooling for men and women. The analysis was conducted using a Mincer earnings function, with log weekly earnings as the dependent variable and schooling, experience, and experience squared as the independent variables.
## Results
The rate of return to schooling was calculated for both men and women. The results are as follows:
### Men
The coefficient of the schooling variable for men is ", schooling_coef_men, ". This represents the rate of return to schooling for men. When converted to a percentage, the rate of return to schooling for men is approximately ", round(percentage_change_men, 2), "%.
### Women
The coefficient of the schooling variable for women is ", schooling_coef_women, ". This represents the rate of return to schooling for women. When converted to a percentage, the rate of return to schooling for women is approximately ", round(percentage_change_women, 2), "%.
## Conclusion
These results provide valuable insights into the rate of return to schooling for men and women. Further analysis could explore the factors contributing to these rates and how they change over time.
")
##
## # Report on the Rate of Return to Schooling for Men and Women
##
## This report presents the results of an analysis on the rate of return to schooling for men and women. The analysis was conducted using a Mincer earnings function, with log weekly earnings as the dependent variable and schooling, experience, and experience squared as the independent variables.
##
## ## Results
##
## The rate of return to schooling was calculated for both men and women. The results are as follows:
##
## ### Men
##
## The coefficient of the schooling variable for men is 0.02848117 . This represents the rate of return to schooling for men. When converted to a percentage, the rate of return to schooling for men is approximately 2.89 %.
##
## ### Women
##
## The coefficient of the schooling variable for women is 0.01938579 . This represents the rate of return to schooling for women. When converted to a percentage, the rate of return to schooling for women is approximately 1.96 %.
##
## ## Conclusion
##
## These results provide valuable insights into the rate of return to schooling for men and women. Further analysis could explore the factors contributing to these rates and how they change over time.
# 3. Calculate the slope of the age-earnings profile at 10 years of experience for men and women.
# For men
experience_coef_men <- coef(model_men)["experience"]
experience_squared_coef_men <- coef(model_men)["experience_squared"]
slope_men <- experience_coef_men + 2 * 10 * experience_squared_coef_men
# For women
experience_coef_women <- coef(model_women)["experience"]
experience_squared_coef_women <- coef(model_women)["experience_squared"]
slope_women <- experience_coef_women + 2 * 10 * experience_squared_coef_women
# Print the results
cat("The slope of the age-earnings profile at 10 years of experience is:\n")
## The slope of the age-earnings profile at 10 years of experience is:
cat("\nFor men: ", round(slope_men, 6), "\n")
##
## For men: 0.015022
cat("This means that, for men, each additional year of experience beyond 10 years is associated with a", round(slope_men * 100, 2), "% increase in weekly earnings, holding all other variables constant.\n")
## This means that, for men, each additional year of experience beyond 10 years is associated with a 1.5 % increase in weekly earnings, holding all other variables constant.
cat("\nFor women: ", round(slope_women, 6), "\n")
##
## For women: 0.007101
cat("This means that, for women, each additional year of experience beyond 10 years is associated with a", round(slope_women * 100, 2), "% increase in weekly earnings, holding all other variables constant.\n")
## This means that, for women, each additional year of experience beyond 10 years is associated with a 0.71 % increase in weekly earnings, holding all other variables constant.
# 4. Produce a diagram illustrating the trends in the rate of return to schooling.
# Initialize an empty data frame to store the results
results <- data.frame()
# Get the unique years in your data
years <- unique(filtered_data$year)
# Loop over the years
for (year in years) {
# Filter the data for the current year
data_year <- filtered_data %>%
filter(year == !!year)
# Loop over the genders
for (gender in c(1, 2)) {
# Filter the data for the current gender
data_gender <- data_year %>%
filter(sex == !!gender)
# Fit the model
model <- lm(lwkearnings ~ schooling + experience + experience_squared, data = data_gender)
# Extract the schooling coefficient
schooling_coef <- coef(model)["schooling"]
# Store the results
results <- rbind(results, data.frame(year = year, gender = gender, schooling_coef = schooling_coef))
}
}
# Convert the gender variable to a factor
results$gender <- factor(results$gender, levels = c(1, 2), labels = c("Men", "Women"))
# Plot the results
ggplot(results, aes(x = year, y = schooling_coef, color = gender)) +
geom_line() +
labs(x = "Year of the Survey", y = "Rate of Return to Schooling", color = "Gender") +
theme_minimal()

# Initialize an empty data frame to store the results
results_exp <- data.frame()
# Loop over the years
for (year in years) {
# Filter the data for the current year
data_year <- filtered_data %>%
filter(year == !!year)
# Loop over the genders
for (gender in c(1, 2)) {
# Filter the data for the current gender
data_gender <- data_year %>%
filter(sex == !!gender)
# Fit the model
model <- lm(lwkearnings ~ schooling + experience + experience_squared, data = data_gender)
# Extract the experience coefficient
experience_coef <- coef(model)["experience"]
# Store the results
results_exp <- rbind(results_exp, data.frame(year = year, gender = gender, experience_coef = experience_coef))
}
}
# Convert the gender variable to a factor
results_exp$gender <- factor(results_exp$gender, levels = c(1, 2), labels = c("Men", "Women"))
# Plot the results
ggplot(results_exp, aes(x = year, y = experience_coef, color = gender)) +
geom_line() +
labs(x = "Year of the Survey", y = "Rate of Return to Experience", color = "Gender") +
theme_minimal()
