# Paso 1: Instalación de paquetes
chooseCRANmirror(graphics = FALSE, ind = 1)
install.packages("readxl")
## Installing package into 'C:/Users/Hp/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Hp\AppData\Local\Temp\Rtmpq2zxMZ\downloaded_packages
install.packages("ggplot2")
## Installing package into 'C:/Users/Hp/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Hp\AppData\Local\Temp\Rtmpq2zxMZ\downloaded_packages
install.packages("stargazer")
## Installing package into 'C:/Users/Hp/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'stargazer' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Hp\AppData\Local\Temp\Rtmpq2zxMZ\downloaded_packages
# Paso 2: Cargar paquetes
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.1
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Paso 3: Cargar las bases de datos
file_path1 <- "C:/Users/Hp/OneDrive/Documents/Clases II Semestre 2024/Analisis Econometrico I/Data sets/Dougherty/Educational Attainment and Wage Equations (EAWE)/eawe_excel/EAWE21.xlsx"
EAWE21 <- read_excel(file_path1)
EAWE21 <- read_excel(file_path1, sheet = 1, col_names = TRUE)
names(EAWE21)
## [1] "ID" "FEMALE" "MALE" "BYEAR" "AGE" "AGEMBTH"
## [7] "HHINC97" "POVRAT97" "HHBMBF" "HHBMOF" "HHOMBF" "HHBMONLY"
## [13] "HHBFONLY" "HHOTHER" "MSA97NO" "MSA97NCC" "MSA97CC" "MSA97NK"
## [19] "ETHBLACK" "ETHHISP" "ETHWHITE" "S" "EDUCPROF" "EDUCPHD"
## [25] "EDUCMAST" "EDUCBA" "EDUCAA" "EDUCHSD" "EDUCGED" "EDUCDO"
## [31] "PRMONM" "PRMONF" "PRMSTYUN" "PRMSTYPE" "PRMSTYAN" "PRMSTYAE"
## [37] "PRFSTYUN" "PRFSTYPE" "PRFSTYAN" "PRFSTYAE" "SINGLE" "MARRIED"
## [43] "COHABIT" "OTHSING" "FAITHN" "FAITHP" "FAITHC" "FAITHJ"
## [49] "FAITHO" "FAITHM" "ASVABAR" "ASVABWK" "ASVABPC" "ASVABMK"
## [55] "ASVABNO" "ASVABCS" "ASVABC" "ASVABC4" "VERBAL" "ASVABMV"
## [61] "HEIGHT" "WEIGHT04" "WEIGHT11" "SF" "SM" "SFR"
## [67] "SMR" "SIBLINGS" "REG97NE" "REG97NC" "REG97S" "REG97W"
## [73] "RS97RURL" "RS97URBN" "RS97UNKN" "JOBS" "EARNINGS" "HOURS"
## [79] "TENURE" "CATGOV" "CATPRI" "CATNPO" "CATMIS" "CATSE"
## [85] "COLLBARG" "URBAN" "REGNE" "REGNC" "REGW" "REGS"
## [91] "MSA11NO" "MSA11NCC" "MSA11CC" "MSA11NK" "MSA11NIC" "EXP"
print(EAWE21)
## # A tibble: 500 × 96
## ID FEMALE MALE BYEAR AGE AGEMBTH HHINC97 POVRAT97 HHBMBF HHBMOF HHOMBF
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8824 0 1 1980 31 29 NA NA 1 0 0
## 2 2926 0 1 1982 29 19 61900 490 1 0 0
## 3 1817 0 1 1983 28 29 47015 251 1 0 0
## 4 4813 0 1 1982 29 31 112800 893 1 0 0
## 5 5724 0 1 1981 30 21 34700 275 0 0 0
## 6 3558 0 1 1984 27 21 65000 409 1 0 0
## 7 3796 0 1 1984 27 17 39000 186 1 0 0
## 8 146 0 1 1984 27 19 47000 295 1 0 0
## 9 1735 0 1 1980 31 31 39600 211 0 1 0
## 10 5234 0 1 1984 27 26 78000 417 1 0 0
## # ℹ 490 more rows
## # ℹ 85 more variables: HHBMONLY <dbl>, HHBFONLY <dbl>, HHOTHER <dbl>,
## # MSA97NO <dbl>, MSA97NCC <dbl>, MSA97CC <dbl>, MSA97NK <dbl>,
## # ETHBLACK <dbl>, ETHHISP <dbl>, ETHWHITE <dbl>, S <dbl>, EDUCPROF <dbl>,
## # EDUCPHD <dbl>, EDUCMAST <dbl>, EDUCBA <dbl>, EDUCAA <dbl>, EDUCHSD <dbl>,
## # EDUCGED <dbl>, EDUCDO <dbl>, PRMONM <dbl>, PRMONF <dbl>, PRMSTYUN <dbl>,
## # PRMSTYPE <dbl>, PRMSTYAN <dbl>, PRMSTYAE <dbl>, PRFSTYUN <dbl>, …
file_path2 <- "C:/Users/Hp/OneDrive/Documents/Clases II Semestre 2024/Analisis Econometrico I/Data sets/Dougherty/OECD Employment and GDP growth rates/oecd.xls"
oecd <- read_excel(file_path2)
## New names:
## • `EMPLOY` -> `EMPLOY...5`
## • `EMPLOY` -> `EMPLOY...10`
oecd <- read_excel(file_path2, sheet=1, col_names=TRUE)
## New names:
## • `EMPLOY` -> `EMPLOY...5`
## • `EMPLOY` -> `EMPLOY...10`
names(oecd)
## [1] "country" "WAGES" "PRICES" "GDP" "EMPLOY...5"
## [6] "MONEY1" "MONEY2" "UNEMPLOY" "Z" "EMPLOY...10"
print(oecd)
## # A tibble: 26 × 10
## country WAGES PRICES GDP EMPLOY...5 MONEY1 MONEY2 UNEMPLOY Z
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ausstralia 4.41 3.75 3.04 1.68 9.1 1.10e1 8.68 0.329
## 2 Austria 4.15 2.71 2.55 0.65 5.37 7.37e0 5.48 0.392
## 3 Belgium 3.99 2.37 2.16 0.34 5.87 -1.00e6 8.49 0.463
## 4 Canada 3.76 2.83 2.03 1.17 6.13 8.51e0 9.51 0.493
## 5 Denmark 3.78 2.61 2.02 0.02 3.21 4.51e0 7.68 0.495
## 6 Finland 5.65 3.11 1.78 -1.06 5.97 -1.00e6 10.4 0.562
## 7 France 3.55 2.4 2.08 0.28 5.19 4.05e0 10.8 0.481
## 8 Germany 4.08 2.78 2.71 0.08 9.08 7.59e0 6.86 0.369
## 9 Greece 14.2 13.1 2.08 0.87 14.5 1.41e1 8.84 0.481
## 10 Iceland -999999 8.42 1.54 -0.13 10.7 1.36e1 3.07 0.649
## # ℹ 16 more rows
## # ℹ 1 more variable: EMPLOY...10 <dbl>
# Paso 4: Analisis exploratorio data EAWE21
summary(EAWE21$EARNINGS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 11.98 17.00 19.58 23.93 103.85
summary(EAWE21$S)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.00 12.00 15.00 14.87 17.00 20.00
hist(EAWE21$EARNINGS, main = "Salario en dolares por hora",
xlab = "Salario (en dolares)")
abline(v = mean(EAWE21$EARNINGS), lty = "dashed")
abline(v = median(EAWE21$EARNINGS))
legend("topright", legend = c("Median", "Mean"), lty = c("solid",
"dashed"))

hist(EAWE21$S, main = "Escolaridad en años de educación",
xlab = "Escolaridad (en años)")
abline(v = mean(EAWE21$S), lty = "dashed")
abline(v = median(EAWE21$S))
legend("topright", legend = c("Median", "Mean"), lty = c("solid",
"dashed"))

boxplot(S ~ MALE, data = EAWE21 , main = "Boxplot de y en función de x", xlab = "CategorÃa de x", ylab = "Valores de y")

EAWE21$MALE <- factor(EAWE21$MALE, levels = c(0, 1), labels = c("Mujer", "Hombre"))
ggplot(EAWE21, aes(x = MALE, y = S, fill=MALE)) +
geom_boxplot() +
labs(title = "Boxplot de Escolaridad en función del Sexo", x = "Sexo", y = "Escolaridad")

boxplot(S ~ ETHWHITE, data = EAWE21 , main = "Boxplot de y en función de x", xlab = "CategorÃa de x", ylab = "Valores de y")

EAWE21$ETHWHITE <- factor(EAWE21$ETHWHITE, levels = c(0, 1), labels = c("Otra etnia", "Persona blanca"))
ggplot(EAWE21, aes(x= ETHWHITE, y=S, fill=ETHWHITE)) +
geom_boxplot() +
labs(title = "Boxplot de Escolaridad en función de la etnia", x="Etnia", y="Escolaridad")

plot(EAWE21$S, EAWE21$EARNINGS, main="Gráfico de dispersion de ingresos y escolaridad")

cor(EAWE21$S, EAWE21$EARNINGS)
## [1] 0.2924262
# Paso 5: Modelo con intercepto
mrls <- lm(EAWE21$EARNINGS ~ EAWE21$S)
summary(mrls)
##
## Call:
## lm(formula = EAWE21$EARNINGS ~ EAWE21$S)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.079 -6.726 -2.203 3.451 79.037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7647 2.8038 0.273 0.785
## EAWE21$S 1.2657 0.1855 6.824 2.58e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.36 on 498 degrees of freedom
## Multiple R-squared: 0.08551, Adjusted R-squared: 0.08368
## F-statistic: 46.57 on 1 and 498 DF, p-value: 2.579e-11
attributes (mrls)
## $names
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
##
## $class
## [1] "lm"
stargazer(mrls, type = "text", out = "modelo ecuación de inresos simple.doc")
##
## ===============================================
## Dependent variable:
## ---------------------------
## EARNINGS
## -----------------------------------------------
## S 1.266***
## (0.185)
##
## Constant 0.765
## (2.804)
##
## -----------------------------------------------
## Observations 500
## R2 0.086
## Adjusted R2 0.084
## Residual Std. Error 11.364 (df = 498)
## F Statistic 46.568*** (df = 1; 498)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
plot(EAWE21$S, EAWE21$EARNINGS, main="Scatterplot of earnings by years of schooling")
abline(mrls)
abline(mrls, col=2, lwd=2)

hist(mrls$residuals)

hist(mrls$fitted.values)

confint(mrls)
## 2.5 % 97.5 %
## (Intercept) -4.7439817 6.273351
## EAWE21$S 0.9012959 1.630128
confint(mrls, level=0.99)
## 0.5 % 99.5 %
## (Intercept) -6.4851159 8.014485
## EAWE21$S 0.7861142 1.745310
confint(mrls, level=0.90)
## 5 % 95 %
## (Intercept) -3.855693 5.385062
## EAWE21$S 0.960059 1.571365
anova(mrls)
## Analysis of Variance Table
##
## Response: EAWE21$EARNINGS
## Df Sum Sq Mean Sq F value Pr(>F)
## EAWE21$S 1 6014 6014.0 46.568 2.579e-11 ***
## Residuals 498 64315 129.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Paso 5: Modelo sin intercepto
mrls2 <- lm(EAWE21$EARNINGS ~ EAWE21$S -1)
summary(mrls2)
##
## Call:
## lm(formula = EAWE21$EARNINGS ~ EAWE21$S - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.309 -6.612 -2.197 3.541 78.856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## EAWE21$S 1.31546 0.03359 39.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.35 on 499 degrees of freedom
## Multiple R-squared: 0.7545, Adjusted R-squared: 0.754
## F-statistic: 1534 on 1 and 499 DF, p-value: < 2.2e-16
plot(EAWE21$S, EAWE21$EARNINGS, main="Scatterplot of earnings by years of schooling")
abline(mrls2)
abline(mrls2, col=2, lwd=2)

# Paso 6: MRLS con data OECD
plot(oecd$EMPLOY...5, oecd$GDP, main="Scatterplot of GDP growth by Employment Growth")
mrls3 <- lm(oecd$GDP ~ oecd$EMPLOY...5)
summary(mrls3)
##
## Call:
## lm(formula = oecd$GDP ~ oecd$EMPLOY...5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8939 -0.6331 -0.1560 0.5193 3.6949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0606 0.3186 6.468 1.09e-06 ***
## oecd$EMPLOY...5 0.7683 0.2049 3.750 0.000988 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.264 on 24 degrees of freedom
## Multiple R-squared: 0.3695, Adjusted R-squared: 0.3432
## F-statistic: 14.06 on 1 and 24 DF, p-value: 0.0009883
attributes (mrls3)
## $names
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
##
## $class
## [1] "lm"
plot(oecd$EMPLOY...5, oecd$GDP, main="Scatterplot of GDP growth by Employment Growth")
abline(mrls3)
abline(mrls3, col=2, lwd=2)

hist(mrls3$residuals)

plot(oecd$GDP, oecd$UNEMPLOY, main="Scatterplot of Unenployment by GDP growth")
mrls4 <- lm(oecd$UNEMPLOY ~ oecd$GDP)
summary(mrls4)
##
## Call:
## lm(formula = oecd$UNEMPLOY ~ oecd$GDP)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9402 -2.1772 -0.2095 1.3518 12.6206
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.0843 1.6362 4.941 4.83e-05 ***
## oecd$GDP -0.2557 0.5113 -0.500 0.621
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.986 on 24 degrees of freedom
## Multiple R-squared: 0.01032, Adjusted R-squared: -0.03092
## F-statistic: 0.2502 on 1 and 24 DF, p-value: 0.6215
plot(oecd$GDP, oecd$UNEMPLOY, main="Scatterplot of Unenployment by GDP growth")
abline(mrls4)
abline(mrls4, col=2, lwd=2)
