library(modelsummary)
library(tidyverse)
library(sjPlot)
wage=read_csv("E:/hw/wage.csv")
summary(wage)
## wage educ exper nonwhite
## Min. : 5.53 Min. : 4.00 Min. : 1.00 Length:100
## 1st Qu.: 8.60 1st Qu.:12.00 1st Qu.: 5.00 Class :character
## Median :10.81 Median :12.00 Median :13.00 Mode :character
## Mean :11.85 Mean :12.82 Mean :15.03
## 3rd Qu.:13.64 3rd Qu.:14.00 3rd Qu.:22.00
## Max. :27.20 Max. :18.00 Max. :45.00
## female married
## Length:100 Length:100
## Class :character Class :character
## Mode :character Mode :character
##
##
##
#datasummary_skim(wage)
#draw a historgram graph for the variabe wage
ggplot(wage, aes(x = wage)) +
geom_histogram(binwidth = 2, fill = "blue", color = "black", alpha = 0.7) + # Change bindwith and alpha values to see what happens
labs(title = "Histogram of wage",
x = "wage", y = "Count") + # Add labels and title
theme_minimal() # Use a minimal theme for a clean look

#draw a scatter plot of wage against educ
plot(x = wage$educ, y = wage$wage, xlab = "years of education", ylab = "hourly wage", pch = 16, cex=0.3, xlim=c(3,20))
# Add text annotations
text(x = wage$educ, y = wage$wage, pos = 4, cex = 0.4)
# Fit a simple linear regression and add the regression line
m1 <- lm(wage ~ educ, data=wage)
abline(a=coef(m1)[1], b=coef(m1)[2], col="red")

#run OLS of wage on educ
model1<- lm(wage~ educ, data = wage)
wage$female <- car::recode(wage$female, "'Y'='female'; 'N'='male'", as.factor=TRUE)
wage$nonwhite <- car::recode(wage$nonwhite, "'Y'='nonwhite'; 'N'='white'", as.factor=TRUE)
wage$married <- car::recode(wage$married, "'Y'='married'; 'N'='unmarried'", as.factor=TRUE)
#run OLS of wage on educ and female
model2<-lm(wage~ educ+female, data = wage)
#run OLS of wage on educ, female and the interaction term between them
model3<-lm(wage~ educ+female+educ:female, data = wage)
#run OLS adding nonwhite and married
model4<-lm(wage~ educ+female+educ:female+nonwhite+married, data = wage)
#Create regression tables for the model using sjPlot packages
tab_model(model1, model2, model3, model4)
|
|
wage
|
wage
|
wage
|
wage
|
|
Predictors
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
3.19
|
-1.18 – 7.56
|
0.151
|
2.57
|
-1.55 – 6.69
|
0.219
|
0.25
|
-6.86 – 7.36
|
0.945
|
1.20
|
-6.17 – 8.57
|
0.747
|
|
educ
|
0.68
|
0.34 – 1.01
|
<0.001
|
0.62
|
0.30 – 0.93
|
<0.001
|
0.80
|
0.24 – 1.36
|
0.005
|
0.86
|
0.31 – 1.42
|
0.003
|
|
female [male]
|
|
|
|
2.83
|
1.32 – 4.35
|
<0.001
|
6.30
|
-2.48 – 15.09
|
0.158
|
6.74
|
-1.97 – 15.46
|
0.128
|
|
educ * female [male]
|
|
|
|
|
|
|
-0.27
|
-0.95 – 0.41
|
0.428
|
-0.33
|
-1.01 – 0.34
|
0.331
|
|
nonwhite [white]
|
|
|
|
|
|
|
|
|
|
-1.02
|
-3.43 – 1.39
|
0.404
|
|
married [unmarried]
|
|
|
|
|
|
|
|
|
|
-1.43
|
-3.00 – 0.15
|
0.075
|
|
Observations
|
100
|
100
|
100
|
100
|
|
R2 / R2 adjusted
|
0.140 / 0.132
|
0.247 / 0.232
|
0.252 / 0.229
|
0.285 / 0.247
|