Packages

install.packages("tidyverse", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

install.packages("psych", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library(psych)

## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

install.packages("knitr",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library("knitr")
install.packages("kableExtra",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library("kableExtra")

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

install.packages("robustbase",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library(robustbase)
install.packages("car",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

install.packages("huxtable", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library("huxtable")

## 
## Attaching package: 'huxtable'
## 
## The following object is masked from 'package:kableExtra':
## 
##     add_footnote
## 
## The following object is masked from 'package:dplyr':
## 
##     add_rownames
## 
## The following object is masked from 'package:ggplot2':
## 
##     theme_grey

install.packages("modelsummary", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library("modelsummary")

## 
## Attaching package: 'modelsummary'
## 
## The following object is masked from 'package:psych':
## 
##     SD

install.packages("stargazer",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp2dV7l4/downloaded_packages

library("stargazer")

## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

DATA

df <- read_csv("cps_2008.csv")

## Rows: 4733 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): wage, educ, age, exper, female, black, white, married, union, nort...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Create Table for descriptive stats

des1 <- describe(df)
des1

vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
1	4.73e+03	10.2	6.21	8.53	9.27	4.83	1.05	78.7	77.7	2.11	9.46	0.0903
2	4.73e+03	13.3	2.36	13	13.2	1.48	1	18	17	-0.0879	1.42	0.0342
3	4.73e+03	38.3	11.3	38	38.1	13.3	18	64	46	0.17	-0.804	0.164
4	4.73e+03	19	11.4	19	18.6	13.3	0	52	52	0.269	-0.727	0.166
5	4.73e+03	0.485	0.5	0	0.481	0	0	1	1	0.0596	-2	0.00727
6	4.73e+03	0.0987	0.298	0	0	0	0	1	1	2.69	5.24	0.00434
7	4.73e+03	0.901	0.298	1	1	0	0	1	1	-2.69	5.24	0.00434
8	4.73e+03	0.604	0.489	1	0.631	0	0	1	1	-0.427	-1.82	0.00711
9	4.73e+03	0.163	0.37	0	0.0792	0	0	1	1	1.82	1.32	0.00537
10	4.73e+03	0.222	0.416	0	0.153	0	0	1	1	1.34	-0.216	0.00604
11	4.73e+03	0.245	0.43	0	0.181	0	0	1	1	1.19	-0.59	0.00625
12	4.73e+03	0.31	0.463	0	0.263	0	0	1	1	0.819	-1.33	0.00673
13	4.73e+03	0.223	0.416	0	0.153	0	0	1	1	1.33	-0.224	0.00605
14	4.73e+03	0.881	0.323	1	0.977	0	0	1	1	-2.36	3.57	0.0047
15	4.73e+03	0.794	0.405	1	0.867	0	0	1	1	-1.45	0.103	0.00588

Ask professor which variables are good for the table

tableF <- knitr::kable(des1, 
             "html",
             caption = "CPS 2008 DATA",
             digits = 2) %>%
footnote(general = "N = 4733") %>% 
  kable_styling(font_size = 10)
tableF

CPS 2008 DATA
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
wage	1	4733	10.19	6.21	8.53	9.27	4.83	1.05	78.71	77.66	2.11	9.46	0.09
educ	2	4733	13.30	2.36	13.00	13.25	1.48	1.00	18.00	17.00	-0.09	1.42	0.03
age	3	4733	38.33	11.30	38.00	38.07	13.34	18.00	64.00	46.00	0.17	-0.80	0.16
exper	4	4733	19.04	11.40	19.00	18.61	13.34	0.00	52.00	52.00	0.27	-0.73	0.17
female	5	4733	0.49	0.50	0.00	0.48	0.00	0.00	1.00	1.00	0.06	-2.00	0.01
black	6	4733	0.10	0.30	0.00	0.00	0.00	0.00	1.00	1.00	2.69	5.24	0.00
white	7	4733	0.90	0.30	1.00	1.00	0.00	0.00	1.00	1.00	-2.69	5.24	0.00
married	8	4733	0.60	0.49	1.00	0.63	0.00	0.00	1.00	1.00	-0.43	-1.82	0.01
union	9	4733	0.16	0.37	0.00	0.08	0.00	0.00	1.00	1.00	1.82	1.32	0.01
northeast	10	4733	0.22	0.42	0.00	0.15	0.00	0.00	1.00	1.00	1.34	-0.22	0.01
midwest	11	4733	0.24	0.43	0.00	0.18	0.00	0.00	1.00	1.00	1.19	-0.59	0.01
south	12	4733	0.31	0.46	0.00	0.26	0.00	0.00	1.00	1.00	0.82	-1.33	0.01
west	13	4733	0.22	0.42	0.00	0.15	0.00	0.00	1.00	1.00	1.33	-0.22	0.01
fulltime	14	4733	0.88	0.32	1.00	0.98	0.00	0.00	1.00	1.00	-2.36	3.57	0.00
metro	15	4733	0.79	0.40	1.00	0.87	0.00	0.00	1.00	1.00	-1.45	0.10	0.01
Note:
N = 4733

Create a regression of wage on educ

set.seed(77)
lmro <- lmrob(df$wage ~ df$educ)

summary(lmro)

## 
## Call:
## lmrob(formula = df$wage ~ df$educ)
##  \--> method = "MM"
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.1965  -2.8792  -0.2992   3.2908  65.1735 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.95530    0.45769  -6.457 1.18e-10 ***
## df$educ      0.91621    0.03672  24.949  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Robust residual standard error: 4.04 
## Multiple R-squared:  0.1963, Adjusted R-squared:  0.1962 
## Convergence in 16 IRWLS iterations
## 
## Robustness weights: 
##  62 observations c(4629,4630,4637,4643,4648,4656,4660,4664,4671,4674,4675,4677,4678,4682,4683,4684,4685,4687,4689,4690,4691,4693,4694,4695,4696,4697,4698,4699,4700,4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,4712,4713,4714,4715,4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731,4732,4733)
##   are outliers with |weight| = 0 ( < 2.1e-05); 
##  318 weights are ~= 1. The remaining 4353 ones are summarized as
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000354 0.8702000 0.9440000 0.8806000 0.9831000 0.9990000 
## Algorithmic parameters: 
##        tuning.chi                bb        tuning.psi        refine.tol 
##         1.548e+00         5.000e-01         4.685e+00         1.000e-07 
##           rel.tol         scale.tol         solve.tol       eps.outlier 
##         1.000e-07         1.000e-10         1.000e-07         2.113e-05 
##             eps.x warn.limit.reject warn.limit.meanrw 
##         3.274e-11         5.000e-01         5.000e-01 
##      nResample         max.it       best.r.s       k.fast.s          k.max 
##            500             50              2              1            200 
##    maxit.scale      trace.lev            mts     compute.rd fast.s.large.n 
##            200              0           1000              0           2000 
##                   psi           subsampling                   cov 
##            "bisquare"         "nonsingular"         ".vcov.avar1" 
## compute.outlier.stats 
##                  "SM" 
## seed : int(0)

Create a scatter plot with fitted linear regression

plot(x = df$educ,
     y = df$wage,
     main = "Wage on Education",
     xlab = "Years of Education",
     ylab = "Earnings per hour",
     pch = 19,
     cex = 0.5,
     sub = "N = 4733",
     frame = TRUE,
     col = "pink")

abline(lmro, col = "blue")

creating table for regression

stargazer(lmro,
          title = "Wage vs Education",
          dep.var.caption = "DV: Earnings per Hour",
          covariate.labels = c("Years of education"),
          notes.label = "Significance level:",
          add.lines = TRUE,
          keep.stat = c("n","rsq"),
          digits = 2,
          type = "html",
          out = "Economretics3.html")

## 
## <table style="text-align:center"><caption><strong>Wage vs Education</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td>DV: Earnings per Hour</td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>wage</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Years of education</td><td>0.92<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.04)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>-2.96<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.46)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">TRUE</td><td></td></tr>
## <tr><td style="text-align:left">Observations</td><td>4,733</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.20</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Significance level:</td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Triying the car package

scatterplot(df$wage ~ df$educ,
            xlab = "Year of Education",
            ylab = "Earnings per Hour",
            main = " Wage VS Years of Education",
            grid = FALSE)

# crerating a a single plot for the 4 scatterplots

#Plot 1 Wage vs Educ

plot1 <- plot(x = df$educ,
     y = df$wage,
     main = "Wage vs Education",
     xlab = "Years of Education",
     ylab = "Earnings per hour",
     pch = 19,
     cex = 0.5,
     frame = TRUE,
     sub = "N = 4733",
     col = "pink")

# PLot 2 Wage vs ln educ

plot2 <- plot(x = log(df$educ),
     y = df$wage,
     main = "Wage vs ln(Education)",
     xlab = "ln(Years of Education)",
     ylab = "Earnings per hour",
     pch = 19,
     cex = 0.5,
     frame = TRUE,
     sub = " N = 4733",
     col = "pink")

# Plot 3 Ln wage vs educ

plot3 <- plot(x = df$educ,
     y = log(df$wage),
     main = "Ln(Wage) vs Education",
     xlab = "Years of Education",
     ylab = "Ln(Earnings per hour)",
     pch = 19,
     cex = 0.5,
     frame = TRUE,
     sub = "N = 4733",
     col = "pink")

PLot 4 ln wage vs ln educ

graph1 <- plot(x = log(df$educ),
     y = log(df$wage),
     main = "ln(Wage) vs ln(Education)",
     xlab = "ln(Years of Education)",
     ylab = "Ln(Earnings per hour)",
     pch = 19,
     cex = 0.5,
     frame = TRUE,
     sub = "N = 4733",
     col = "pink")

# Creating a single plot for the 4 graphs

We can add to the 4 plots abline(lmro, col = “blue”)

par(mfrow=c(2,2))
plot(x = df$educ,
     y = df$wage,
     main = "Wage vs Education",
     xlab = "Years of Education",
     ylab = "Earnings per hour",
     sub = "N = 4733",
     col = "pink")
plot(x = log(df$educ),
     y = df$wage,
     main = "Wage vs ln(Education)",
     xlab = "ln(Years of Education)",
     ylab = "Earnings per hour",
     sub = " N = 4733",
     col = "pink")
plot(x = df$educ,
     y = log(df$wage),
     main = "Ln(Wage) vs Education",
     xlab = "Years of Education",
     ylab = "Ln(Earnings per hour)",
     sub = "N = 4733",
     col = "pink")
plot(x = log(df$educ),
     y = log(df$wage),
     main = "ln(Wage) vs ln(Education)",
     xlab = "ln(Years of Education)",
     ylab = "Ln(Earnings per hour)",
     sub = "N = 4733",
     col = "pink")

# Reggress 5 models

Model 1 Ln wage on ln educ

model1 <- lmrob(log(df$wage)~log(df$educ))

Model 2 Ln wage on educ

model2 <- lmrob(log(df$wage)~df$educ)

Model 3 Ln wage on educ and exper

model3 <- lmrob(log(df$wage)~ df$educ + df$exper)

Model 4 Ln wage on educ, expr and female

model4 <- lmrob(log(df$wage)~ df$educ + df$exper + df$female)

Model 5 Ln wage on educ, expr, female female_educ

Female_educ is variable that we need to add

female_educ <- (df$female*df$educ)

df$female_educ = female_educ

head(df)

wage	educ	age	exper	female	white	married	union	northeast	midwest	west	fulltime	metro	female_educ
1.05	12	37	19	0	1	0	1	0	0	1	1	1	0
1.05	13	42	23	0	1	0	0	1	0	0	1	1	0
1.23	8	54	40	0	1	0	0	0	0	1	1	1	0
1.28	10	59	43	1	1	1	1	1	0	0	0	1	10
1.34	18	28	4	1	1	0	0	0	0	1	1	0	18
1.47	12	40	22	1	1	1	1	0	1	0	0	1	12

model5 <- lmrob(log(df$wage)~ df$educ + df$exper + df$female + df$female_educ)

Create Side By side table using the 5 models

SBST <-huxreg(model1, model2, model3, model4, model5,
              statistics = character(0),
              number_format = 0)

Using another package

models <- list("Model 1" = model1,
               "Model 2" = model2,
               "Model 3" = model3,
               "Model 4" = model4,
               "Model 5" = model5)

modelsummary(models, 
             fmt = 2,
             stars = TRUE,
             output = "table.html")

plot(model1)

## recomputing robust Mahalanobis distances

## saving the robust distances 'MD' as part of 'model1'

plot(model2)

## recomputing robust Mahalanobis distances

## saving the robust distances 'MD' as part of 'model2'

Extending model 5 to incorporate region dummies

model51 <- lmrob(log(df$wage)~ df$educ + df$exper + df$female + df$female_educ + df$northeast + df$midwest + df$south)

stargazer(model51,
          title = "Ln(Wage) on Education",
          dep.var.caption = "DV: Earnings per hour (wage)" ,
          notes.label = "N = 4377| Significance level:",
          add.lines = TRUE,
          keep.stat = c("n","rsq"),
          digits = 2,
          type = "html",
          out = "Economretics4.html")

## 
## <table style="text-align:center"><caption><strong>Ln(Wage) on Education</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td>DV: Earnings per hour (wage)</td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>wage)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">educ</td><td>0.11<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.004)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">exper</td><td>0.01<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">female</td><td>-0.57<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.08)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">female_educ</td><td>0.02<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.01)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">northeast</td><td>0.04<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.02)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">midwest</td><td>0.001</td></tr>
## <tr><td style="text-align:left"></td><td>(0.02)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">south</td><td>-0.06<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.02)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>0.62<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.05)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">TRUE</td><td></td></tr>
## <tr><td style="text-align:left">Observations</td><td>4,733</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.35</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">N = 4377| Significance level:</td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

F- test regions jointly do not matter for earnings

Full model

full <- lm(log(df$wage)~ df$educ + df$exper + df$female + df$female_educ + df$northeast + df$midwest + df$south)

reduced <- lm(log(df$wage)~ df$educ + df$exper + df$female + df$female_educ)

anova(reduced, full)

Res.Df	RSS	Df	Sum of Sq	F	Pr(>F)
4.73e+03	977
4.72e+03	971	3	6.38	10.4	8.59e-07

Econometrics HW 3

Diego De Armas

2023-04-26

Packages

DATA

Create Table for descriptive stats

Ask professor which variables are good for the table

Create a regression of wage on educ

Create a scatter plot with fitted linear regression

creating table for regression

Triying the car package

PLot 4 ln wage vs ln educ

We can add to the 4 plots abline(lmro, col = “blue”)

Model 1 Ln wage on ln educ

Model 2 Ln wage on educ

Model 3 Ln wage on educ and exper

Model 4 Ln wage on educ, expr and female

Model 5 Ln wage on educ, expr, female female_educ

Female_educ is variable that we need to add

Create Side By side table using the 5 models

Using another package

Extending model 5 to incorporate region dummies

F- test regions jointly do not matter for earnings