# Paso 1: Instalación de paquetes
chooseCRANmirror(graphics = FALSE, ind = 1)
install.packages("readxl")
## Installing package into 'C:/Users/Hp/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Hp\AppData\Local\Temp\Rtmpq2zxMZ\downloaded_packages
install.packages("ggplot2")
## Installing package into 'C:/Users/Hp/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Hp\AppData\Local\Temp\Rtmpq2zxMZ\downloaded_packages
install.packages("stargazer")
## Installing package into 'C:/Users/Hp/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'stargazer' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Hp\AppData\Local\Temp\Rtmpq2zxMZ\downloaded_packages
# Paso 2: Cargar paquetes
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.1
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Paso 3: Cargar las bases de datos
file_path1 <- "C:/Users/Hp/OneDrive/Documents/Clases II Semestre 2024/Analisis Econometrico I/Data sets/Dougherty/Educational Attainment and Wage Equations (EAWE)/eawe_excel/EAWE21.xlsx"

EAWE21 <- read_excel(file_path1)
EAWE21 <- read_excel(file_path1, sheet = 1, col_names = TRUE)
names(EAWE21)
##  [1] "ID"       "FEMALE"   "MALE"     "BYEAR"    "AGE"      "AGEMBTH" 
##  [7] "HHINC97"  "POVRAT97" "HHBMBF"   "HHBMOF"   "HHOMBF"   "HHBMONLY"
## [13] "HHBFONLY" "HHOTHER"  "MSA97NO"  "MSA97NCC" "MSA97CC"  "MSA97NK" 
## [19] "ETHBLACK" "ETHHISP"  "ETHWHITE" "S"        "EDUCPROF" "EDUCPHD" 
## [25] "EDUCMAST" "EDUCBA"   "EDUCAA"   "EDUCHSD"  "EDUCGED"  "EDUCDO"  
## [31] "PRMONM"   "PRMONF"   "PRMSTYUN" "PRMSTYPE" "PRMSTYAN" "PRMSTYAE"
## [37] "PRFSTYUN" "PRFSTYPE" "PRFSTYAN" "PRFSTYAE" "SINGLE"   "MARRIED" 
## [43] "COHABIT"  "OTHSING"  "FAITHN"   "FAITHP"   "FAITHC"   "FAITHJ"  
## [49] "FAITHO"   "FAITHM"   "ASVABAR"  "ASVABWK"  "ASVABPC"  "ASVABMK" 
## [55] "ASVABNO"  "ASVABCS"  "ASVABC"   "ASVABC4"  "VERBAL"   "ASVABMV" 
## [61] "HEIGHT"   "WEIGHT04" "WEIGHT11" "SF"       "SM"       "SFR"     
## [67] "SMR"      "SIBLINGS" "REG97NE"  "REG97NC"  "REG97S"   "REG97W"  
## [73] "RS97RURL" "RS97URBN" "RS97UNKN" "JOBS"     "EARNINGS" "HOURS"   
## [79] "TENURE"   "CATGOV"   "CATPRI"   "CATNPO"   "CATMIS"   "CATSE"   
## [85] "COLLBARG" "URBAN"    "REGNE"    "REGNC"    "REGW"     "REGS"    
## [91] "MSA11NO"  "MSA11NCC" "MSA11CC"  "MSA11NK"  "MSA11NIC" "EXP"
print(EAWE21)
## # A tibble: 500 × 96
##       ID FEMALE  MALE BYEAR   AGE AGEMBTH HHINC97 POVRAT97 HHBMBF HHBMOF HHOMBF
##    <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>  <dbl>  <dbl>  <dbl>
##  1  8824      0     1  1980    31      29      NA       NA      1      0      0
##  2  2926      0     1  1982    29      19   61900      490      1      0      0
##  3  1817      0     1  1983    28      29   47015      251      1      0      0
##  4  4813      0     1  1982    29      31  112800      893      1      0      0
##  5  5724      0     1  1981    30      21   34700      275      0      0      0
##  6  3558      0     1  1984    27      21   65000      409      1      0      0
##  7  3796      0     1  1984    27      17   39000      186      1      0      0
##  8   146      0     1  1984    27      19   47000      295      1      0      0
##  9  1735      0     1  1980    31      31   39600      211      0      1      0
## 10  5234      0     1  1984    27      26   78000      417      1      0      0
## # ℹ 490 more rows
## # ℹ 85 more variables: HHBMONLY <dbl>, HHBFONLY <dbl>, HHOTHER <dbl>,
## #   MSA97NO <dbl>, MSA97NCC <dbl>, MSA97CC <dbl>, MSA97NK <dbl>,
## #   ETHBLACK <dbl>, ETHHISP <dbl>, ETHWHITE <dbl>, S <dbl>, EDUCPROF <dbl>,
## #   EDUCPHD <dbl>, EDUCMAST <dbl>, EDUCBA <dbl>, EDUCAA <dbl>, EDUCHSD <dbl>,
## #   EDUCGED <dbl>, EDUCDO <dbl>, PRMONM <dbl>, PRMONF <dbl>, PRMSTYUN <dbl>,
## #   PRMSTYPE <dbl>, PRMSTYAN <dbl>, PRMSTYAE <dbl>, PRFSTYUN <dbl>, …
file_path2 <- "C:/Users/Hp/OneDrive/Documents/Clases II Semestre 2024/Analisis Econometrico I/Data sets/Dougherty/OECD Employment and GDP growth rates/oecd.xls"

oecd <- read_excel(file_path2)
## New names:
## • `EMPLOY` -> `EMPLOY...5`
## • `EMPLOY` -> `EMPLOY...10`
oecd <- read_excel(file_path2, sheet=1, col_names=TRUE)
## New names:
## • `EMPLOY` -> `EMPLOY...5`
## • `EMPLOY` -> `EMPLOY...10`
names(oecd)
##  [1] "country"     "WAGES"       "PRICES"      "GDP"         "EMPLOY...5" 
##  [6] "MONEY1"      "MONEY2"      "UNEMPLOY"    "Z"           "EMPLOY...10"
print(oecd)
## # A tibble: 26 × 10
##    country         WAGES PRICES   GDP EMPLOY...5 MONEY1    MONEY2 UNEMPLOY     Z
##    <chr>           <dbl>  <dbl> <dbl>      <dbl>  <dbl>     <dbl>    <dbl> <dbl>
##  1 Ausstralia       4.41   3.75  3.04       1.68   9.1     1.10e1     8.68 0.329
##  2 Austria          4.15   2.71  2.55       0.65   5.37    7.37e0     5.48 0.392
##  3 Belgium          3.99   2.37  2.16       0.34   5.87   -1.00e6     8.49 0.463
##  4 Canada           3.76   2.83  2.03       1.17   6.13    8.51e0     9.51 0.493
##  5 Denmark          3.78   2.61  2.02       0.02   3.21    4.51e0     7.68 0.495
##  6 Finland          5.65   3.11  1.78      -1.06   5.97   -1.00e6    10.4  0.562
##  7 France           3.55   2.4   2.08       0.28   5.19    4.05e0    10.8  0.481
##  8 Germany          4.08   2.78  2.71       0.08   9.08    7.59e0     6.86 0.369
##  9 Greece          14.2   13.1   2.08       0.87  14.5     1.41e1     8.84 0.481
## 10 Iceland    -999999      8.42  1.54      -0.13  10.7     1.36e1     3.07 0.649
## # ℹ 16 more rows
## # ℹ 1 more variable: EMPLOY...10 <dbl>
# Paso 4: Analisis exploratorio data EAWE21

summary(EAWE21$EARNINGS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   11.98   17.00   19.58   23.93  103.85
summary(EAWE21$S)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8.00   12.00   15.00   14.87   17.00   20.00
hist(EAWE21$EARNINGS, main = "Salario en dolares por hora",
 xlab = "Salario (en dolares)")
 abline(v = mean(EAWE21$EARNINGS), lty = "dashed")
 abline(v = median(EAWE21$EARNINGS))
 legend("topright", legend = c("Median", "Mean"), lty = c("solid",
 "dashed"))

hist(EAWE21$S, main = "Escolaridad en años de educación",
 xlab = "Escolaridad (en años)")
 abline(v = mean(EAWE21$S), lty = "dashed")
 abline(v = median(EAWE21$S))
 legend("topright", legend = c("Median", "Mean"), lty = c("solid",
 "dashed"))

boxplot(S ~ MALE, data = EAWE21 , main = "Boxplot de y en función de x", xlab = "Categoría de x", ylab = "Valores de y")

EAWE21$MALE <- factor(EAWE21$MALE, levels = c(0, 1), labels = c("Mujer", "Hombre"))
ggplot(EAWE21, aes(x = MALE, y = S, fill=MALE)) +
  geom_boxplot() +
  labs(title = "Boxplot de Escolaridad en función del Sexo", x = "Sexo", y = "Escolaridad")

boxplot(S ~ ETHWHITE, data = EAWE21 , main = "Boxplot de y en función de x", xlab = "Categoría de x", ylab = "Valores de y")

EAWE21$ETHWHITE <- factor(EAWE21$ETHWHITE, levels = c(0, 1), labels = c("Otra etnia", "Persona blanca"))
ggplot(EAWE21, aes(x= ETHWHITE, y=S, fill=ETHWHITE)) +
 geom_boxplot() +
 labs(title = "Boxplot de Escolaridad en función de la etnia", x="Etnia", y="Escolaridad")

plot(EAWE21$S, EAWE21$EARNINGS, main="Gráfico de dispersion de ingresos y escolaridad")

cor(EAWE21$S, EAWE21$EARNINGS)
## [1] 0.2924262
# Paso 5: Modelo con intercepto

mrls <- lm(EAWE21$EARNINGS ~ EAWE21$S)  
summary(mrls)
## 
## Call:
## lm(formula = EAWE21$EARNINGS ~ EAWE21$S)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.079  -6.726  -2.203   3.451  79.037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.7647     2.8038   0.273    0.785    
## EAWE21$S      1.2657     0.1855   6.824 2.58e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.36 on 498 degrees of freedom
## Multiple R-squared:  0.08551,    Adjusted R-squared:  0.08368 
## F-statistic: 46.57 on 1 and 498 DF,  p-value: 2.579e-11
attributes (mrls)
## $names
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"        
## 
## $class
## [1] "lm"
stargazer(mrls, type = "text", out = "modelo ecuación de inresos simple.doc")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                              EARNINGS          
## -----------------------------------------------
## S                            1.266***          
##                               (0.185)          
##                                                
## Constant                       0.765           
##                               (2.804)          
##                                                
## -----------------------------------------------
## Observations                    500            
## R2                             0.086           
## Adjusted R2                    0.084           
## Residual Std. Error      11.364 (df = 498)     
## F Statistic           46.568*** (df = 1; 498)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
plot(EAWE21$S, EAWE21$EARNINGS, main="Scatterplot of earnings by years of schooling")
abline(mrls)
abline(mrls, col=2, lwd=2)

hist(mrls$residuals)

hist(mrls$fitted.values)

confint(mrls)
##                  2.5 %   97.5 %
## (Intercept) -4.7439817 6.273351
## EAWE21$S     0.9012959 1.630128
confint(mrls, level=0.99)
##                  0.5 %   99.5 %
## (Intercept) -6.4851159 8.014485
## EAWE21$S     0.7861142 1.745310
confint(mrls, level=0.90)
##                   5 %     95 %
## (Intercept) -3.855693 5.385062
## EAWE21$S     0.960059 1.571365
anova(mrls)
## Analysis of Variance Table
## 
## Response: EAWE21$EARNINGS
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## EAWE21$S    1   6014  6014.0  46.568 2.579e-11 ***
## Residuals 498  64315   129.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Paso 5: Modelo sin intercepto
mrls2 <- lm(EAWE21$EARNINGS ~ EAWE21$S -1)
summary(mrls2)
## 
## Call:
## lm(formula = EAWE21$EARNINGS ~ EAWE21$S - 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.309  -6.612  -2.197   3.541  78.856 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## EAWE21$S  1.31546    0.03359   39.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.35 on 499 degrees of freedom
## Multiple R-squared:  0.7545, Adjusted R-squared:  0.754 
## F-statistic:  1534 on 1 and 499 DF,  p-value: < 2.2e-16
plot(EAWE21$S, EAWE21$EARNINGS, main="Scatterplot of earnings by years of schooling")
abline(mrls2)
abline(mrls2, col=2, lwd=2)

# Paso 6: MRLS con data OECD

plot(oecd$EMPLOY...5, oecd$GDP, main="Scatterplot of GDP growth by Employment Growth")
mrls3 <- lm(oecd$GDP ~ oecd$EMPLOY...5)  
summary(mrls3)
## 
## Call:
## lm(formula = oecd$GDP ~ oecd$EMPLOY...5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8939 -0.6331 -0.1560  0.5193  3.6949 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.0606     0.3186   6.468 1.09e-06 ***
## oecd$EMPLOY...5   0.7683     0.2049   3.750 0.000988 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.264 on 24 degrees of freedom
## Multiple R-squared:  0.3695, Adjusted R-squared:  0.3432 
## F-statistic: 14.06 on 1 and 24 DF,  p-value: 0.0009883
attributes (mrls3)
## $names
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"        
## 
## $class
## [1] "lm"
plot(oecd$EMPLOY...5, oecd$GDP, main="Scatterplot of GDP growth by Employment Growth")
abline(mrls3)
abline(mrls3, col=2, lwd=2)

hist(mrls3$residuals)

plot(oecd$GDP, oecd$UNEMPLOY, main="Scatterplot of Unenployment by GDP growth")
mrls4 <- lm(oecd$UNEMPLOY ~ oecd$GDP)  
summary(mrls4)
## 
## Call:
## lm(formula = oecd$UNEMPLOY ~ oecd$GDP)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9402 -2.1772 -0.2095  1.3518 12.6206 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.0843     1.6362   4.941 4.83e-05 ***
## oecd$GDP     -0.2557     0.5113  -0.500    0.621    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.986 on 24 degrees of freedom
## Multiple R-squared:  0.01032,    Adjusted R-squared:  -0.03092 
## F-statistic: 0.2502 on 1 and 24 DF,  p-value: 0.6215
plot(oecd$GDP, oecd$UNEMPLOY, main="Scatterplot of Unenployment by GDP growth")
abline(mrls4)
abline(mrls4, col=2, lwd=2)