# Punto 1
#Punto 2
q <- c(60323, 61122, 60171, 61187, 63221, 63639, 64989, 63761)
a <- c(830,885, 882,895, 962,981, 990,1000)
b <- c(234289, 259426, 258054, 284599, 328975, 346999,365385, 363112)
c <- c(2356,2325,3682,3351,2099,1932,1870,3578)
d <- c(1590,1456,1616,1650,3099,3594,3547,3350)
data <- data.frame(q,a,b,c,d)
summary(olsi <- lm(q~a+b, data = data))
## 
## Call:
## lm(formula = q ~ a + b, data = data)
## 
## Residuals:
##       1       2       3       4       5       6       7       8 
##  171.91  548.99 -378.95 -571.66  208.02  -22.47  490.26 -446.11 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)   
## (Intercept) 61591.27049 11410.69431   5.398  0.00295 **
## a             -16.92866    20.10015  -0.842  0.43808   
## b               0.05382     0.02421   2.223  0.07682 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.9 on 5 degrees of freedom
## Multiple R-squared:  0.9443, Adjusted R-squared:  0.9221 
## F-statistic: 42.41 on 2 and 5 DF,  p-value: 0.0007312
anova(olsi)
## Analysis of Variance Table
## 
## Response: q
##           Df   Sum Sq  Mean Sq F value    Pr(>F)    
## a          1 20519754 20519754 79.8753 0.0002922 ***
## b          1  1269765  1269765  4.9427 0.0768170 .  
## Residuals  5  1284487   256897                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
var2 <- 256897
desviación_estandar <- 256897^(1/2)
desviación_estandar
## [1] 506.8501
predict(olsi)
##        1        2        3        4        5        6        7        8 
## 60151.09 60573.01 60549.95 61758.66 63012.98 63661.47 64498.74 64207.11
summary(ivi <- ivreg(q~a+b|c+d, data = data))
## 
## Call:
## ivreg(formula = q ~ a + b | c + d, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2592.7 -1680.0   353.4  1370.2  2783.7 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)
## (Intercept) 174177.1259 261796.7100   0.665    0.535
## a             -216.1424    463.4645  -0.466    0.661
## b                0.2908      0.5522   0.527    0.621
## 
## Diagnostic tests:
##                      df1 df2 statistic p-value   
## Weak instruments (a)   2   5    27.394 0.00202 **
## Weak instruments (b)   2   5    36.789 0.00102 **
## Wu-Hausman             2   3     6.351 0.08351 . 
## Sargan                 0  NA        NA      NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2303 on 5 degrees of freedom
## Multiple R-Squared: -0.1493, Adjusted R-squared: -0.6091 
## Wald test: 1.986 on 2 and 5 DF,  p-value: 0.2319
modelsummary(list("olsi"=olsi,"ivi"=ivi))
tinytable_2p7hwscuq0zzv0of5j36
olsi ivi
(Intercept) 61591.270 174177.126
(11410.694) (261796.710)
a -16.929 -216.142
(20.100) (463.465)
b 0.054 0.291
(0.024) (0.552)
Num.Obs. 8 8
R2 0.944 -0.149
R2 Adj. 0.922 -0.609
AIC 126.6 150.8
BIC 126.9 151.1
Log.Lik. -59.297
F 42.409
RMSE 400.70 1820.71

Interpretación

Para el modelo incial por minimos cuadrados el Ba es -16.929, el Bb 0.054 y los predecidos de la variable dependiente q se hallaron utilizado el comando predict y se ecnuentran en los resultados presentados en la consola, finalmente la desviación estandar de este modelo es de 506.8501. Para el modelo por variables instrumentales utilizando c y d como instrumentos de a y b respectivamente se obtuvo que el Ba es -216.142 y el Bb es 0.291.

# Cargar datos 
P1t_1_ <- read_excel("C:/Users/LABSIS/Downloads/P1t(1).xlsx")

View(P1t_1_)
# Crear un n de 98430
n_sim <- 153247
# Simulación de las variables
set.seed(123) # Para reproducibilidad
simulation <- data.frame(
  ageq = rnorm(n_sim, mean(P1t_1_$ageq, na.rm=TRUE), sd(P1t_1_$ageq, na.rm=TRUE)),
  edu = rnorm(n_sim, mean(P1t_1_$edu, na.rm=TRUE), sd(P1t_1_$edu, na.rm=TRUE)),
  married = rbinom(n_sim, 1, mean(P1t_1_$married, na.rm=TRUE)),
  state = sample(P1t_1_$state, n_sim, replace = TRUE),
  qob = sample(P1t_1_$qob, n_sim, replace = TRUE),
  black = rbinom(n_sim, 1, mean(P1t_1_$black, na.rm=TRUE)),
  smsa = rbinom(n_sim, 1, mean(P1t_1_$smsa, na.rm=TRUE)),
  yob = sample(P1t_1_$yob, n_sim, replace = TRUE),
  region = sample(P1t_1_$region, n_sim, replace = TRUE),
  wage = rnorm(n_sim, mean(P1t_1_$wage, na.rm=TRUE), sd(P1t_1_$wage, na.rm=TRUE)))
View(simulation)

data <- simulation[!(simulation$wage<= 0 ),]
#View(data)
data$lw <- log(data$wage)
data2 <- P1t_1_
#View(data2)
data2$lw <- log(data2$wage)
# Estimaciones

summary(ols1 <- lm(lw~black+smsa+married+yob+region+qob, data=data))
## 
## Call:
## lm(formula = lw ~ black + smsa + married + yob + region + qob, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6694 -0.2100  0.0969  0.3310  1.1101 
## 
## Coefficients:
##               Estimate Std. Error t value      Pr(>|t|)    
## (Intercept)  6.3776695  0.9883994   6.453 0.00000000011 ***
## black        0.0035528  0.0060338   0.589        0.5560    
## smsa         0.0126594  0.0060622   2.088        0.0368 *  
## married      0.0035480  0.0060244   0.589        0.5559    
## yob         -0.0002053  0.0005111  -0.402        0.6879    
## region      -0.0006310  0.0004527  -1.394        0.1634    
## qob          0.0007012  0.0011759   0.596        0.5510    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5128 on 152313 degrees of freedom
## Multiple R-squared:  4.956e-05,  Adjusted R-squared:  1.017e-05 
## F-statistic: 1.258 on 6 and 152313 DF,  p-value: 0.2731
###### Linealidad
plot(ols1, 1)

reols1 <- resid(ols1)
mean(reols1)
## [1] -0.000000000000000003951084
###### Independencia 

dwtest(ols1)
## 
##  Durbin-Watson test
## 
## data:  ols1
## DW = 1.9996, p-value = 0.472
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de  Homoscedasticidad

ols_test_breusch_pagan(ols1)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##              Data              
##  ------------------------------
##  Response : lw 
##  Variables: fitted values of lw 
## 
##           Test Summary            
##  ---------------------------------
##  DF            =    1 
##  Chi2          =    24.62947 
##  Prob > Chi2   =    0.000000694809
###### Supuesto de colinialidad

ols_coll_diag(ols1)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
##   Variables Tolerance      VIF
## 1     black 0.9999381 1.000062
## 2      smsa 0.9999360 1.000064
## 3   married 0.9999370 1.000063
## 4       yob 0.9999808 1.000019
## 5    region 0.9999763 1.000024
## 6       qob 0.9999775 1.000023
## 
## 
## Eigenvalue and Condition Index
## ------------------------------
##        Eigenvalue Condition Index          intercept           black
## 1 5.5372886511330        1.000000 0.0000000565848697 0.0022150086699
## 2 0.9408606574249        2.425973 0.0000000035536710 0.9964034376383
## 3 0.3035545675652        4.271006 0.0000000952673356 0.0006129094277
## 4 0.1445343676180        6.189606 0.0000004273810870 0.0006096429742
## 5 0.0495027985807       10.576299 0.0000000008407888 0.0000007387505
## 6 0.0242580739532       15.108468 0.0000196338807576 0.0001580838157
## 7 0.0000008837251     2503.167928 0.9999797824914901 0.0000001787236
##             smsa        married                yob         region
## 1 0.001531730526 0.001551530526 0.0000000565865019 0.008123575141
## 2 0.000099074780 0.000101523242 0.0000000035538853 0.001121942662
## 3 0.003659646607 0.003592495368 0.0000000952639686 0.945575304709
## 4 0.026135519164 0.027169570204 0.0000004273546639 0.030645550102
## 5 0.493835869579 0.511248816314 0.0000000008421679 0.000006197033
## 6 0.474736055479 0.456330510495 0.0000196357781759 0.014524013231
## 7 0.000002103865 0.000005553852 0.9999797806206362 0.000003417122
##              qob
## 1 0.004873257609
## 2 0.000459269899
## 3 0.041678019290
## 4 0.910658108701
## 5 0.000039080175
## 6 0.042289867322
## 7 0.000002397004
###### Supuesto de normalidad

lillie.test(reols1)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  reols1
## D = 0.099484, p-value < 0.00000000000000022
ols_plot_resid_qq(ols1)

summary(ols2 <- lm(lw~black+smsa+married+factor(yob)+factor(region), data=data))
## 
## Call:
## lm(formula = lw ~ black + smsa + married + factor(yob) + factor(region), 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6709 -0.2100  0.0971  0.3311  1.1174 
## 
## Coefficients:
##                  Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)      5.980731   0.009472 631.438 <0.0000000000000002 ***
## black            0.003549   0.006034   0.588              0.5565    
## smsa             0.012729   0.006063   2.100              0.0358 *  
## married          0.003572   0.006025   0.593              0.5533    
## factor(yob)1931  0.005481   0.005104   1.074              0.2829    
## factor(yob)1932  0.005496   0.005873   0.936              0.3494    
## factor(yob)1933  0.006772   0.005877   1.152              0.2492    
## factor(yob)1934  0.002697   0.005890   0.458              0.6470    
## factor(yob)1935  0.002924   0.005365   0.545              0.5858    
## factor(yob)1936  0.007641   0.007176   1.065              0.2870    
## factor(yob)1937  0.005643   0.005881   0.959              0.3373    
## factor(yob)1938 -0.001306   0.005892  -0.222              0.8246    
## factor(region)1 -0.001600   0.005126  -0.312              0.7549    
## factor(region)3 -0.009757   0.004151  -2.351              0.0187 *  
## factor(region)5  0.001966   0.006612   0.297              0.7662    
## factor(region)6 -0.005925   0.004160  -1.424              0.1543    
## factor(region)7 -0.008296   0.005094  -1.629              0.1034    
## factor(region)8 -0.005479   0.004489  -1.220              0.2223    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5128 on 152302 degrees of freedom
## Multiple R-squared:  0.000115,   Adjusted R-squared:  3.399e-06 
## F-statistic:  1.03 on 17 and 152302 DF,  p-value: 0.4199
###### Linealidad
plot(ols2, 1)

reols2 <- resid(ols2)
mean(reols2)
## [1] -0.000000000000000009015737
###### Independencia 

dwtest(ols2)
## 
##  Durbin-Watson test
## 
## data:  ols2
## DW = 1.9995, p-value = 0.4639
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de  Homoscedasticidad

ols_test_breusch_pagan(ols2)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##              Data              
##  ------------------------------
##  Response : lw 
##  Variables: fitted values of lw 
## 
##                 Test Summary                 
##  --------------------------------------------
##  DF            =    1 
##  Chi2          =    75.62005 
##  Prob > Chi2   =    0.00000000000000000343852
###### Supuesto de colinialidad

ols_coll_diag(ols2)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
##          Variables Tolerance      VIF
## 1            black 0.9998728 1.000127
## 2             smsa 0.9998124 1.000188
## 3          married 0.9998819 1.000118
## 4  factor(yob)1931 0.4166396 2.400156
## 5  factor(yob)1932 0.5526519 1.809457
## 6  factor(yob)1933 0.5531691 1.807765
## 7  factor(yob)1934 0.5551674 1.801259
## 8  factor(yob)1935 0.4677779 2.137767
## 9  factor(yob)1936 0.6983442 1.431959
## 10 factor(yob)1937 0.5538217 1.805635
## 11 factor(yob)1938 0.5554842 1.800231
## 12 factor(region)1 0.7417245 1.348209
## 13 factor(region)3 0.6207446 1.610968
## 14 factor(region)5 0.8424662 1.186991
## 15 factor(region)6 0.6220543 1.607577
## 16 factor(region)7 0.7385185 1.354062
## 17 factor(region)8 0.6691617 1.494407
## 
## 
## Eigenvalue and Condition Index
## ------------------------------
##    Eigenvalue Condition Index          intercept             black
## 1  4.74022522        1.000000 0.0008400300587019 0.003255773688131
## 2  1.01350337        2.162653 0.0000000153279455 0.000204840771264
## 3  1.00771076        2.168860 0.0000000024562133 0.000040013213440
## 4  1.00502298        2.171758 0.0000000002511476 0.000039237353240
## 5  1.00388214        2.172992 0.0000000146833471 0.000321657844413
## 6  1.00265360        2.174323 0.0000001777590864 0.002258165483869
## 7  1.00042753        2.176741 0.0000003275558286 0.006384494615557
## 8  1.00000137        2.177204 0.0000000015222155 0.000000001650735
## 9  0.99748623        2.179947 0.0000000216441656 0.000118418374585
## 10 0.99627632        2.181271 0.0000001821162782 0.002083127545213
## 11 0.99508734        2.182574 0.0000000980134729 0.001475975806442
## 12 0.99248134        2.185437 0.0000001698905167 0.002765557418469
## 13 0.98652569        2.192024 0.0000000122745427 0.000204882434409
## 14 0.93726088        2.248895 0.0000608661776200 0.980062935159352
## 15 0.17327177        5.230408 0.0023093602978781 0.000365118581367
## 16 0.08423613        7.501537 0.0056819050982504 0.000144819185217
## 17 0.04949132        9.786671 0.0000090458412195 0.000000657855008
## 18 0.01445600       18.108203 0.9910977690315700 0.000274323019288
##                     smsa                married factor(yob)1931 factor(yob)1932
## 1  0.0020946192042749306 0.00212158901679229510   0.00301650308  0.002278600726
## 2  0.0000000353271849312 0.00000002680940110251   0.00277172648  0.159310088623
## 3  0.0000000030663173925 0.00000000000003152622   0.00001842631  0.000009764602
## 4  0.0000000000006515996 0.00000000099320405804   0.08378782512  0.015096797546
## 5  0.0000000317754613322 0.00000005982515238745   0.01061273762  0.023938206552
## 6  0.0000002329095384135 0.00000064227463380999   0.00149329537  0.007434247234
## 7  0.0000011236104414942 0.00000130032530286667   0.03040417430  0.020010504610
## 8  0.0000000195932688980 0.00000000023459988115   0.03553827694  0.000137434785
## 9  0.0000000424885946063 0.00000010270841809298   0.00083286992  0.006639201381
## 10 0.0000003665803540798 0.00000055373788153599   0.00708011367  0.031900664428
## 11 0.0000001624203245834 0.00000034062911836758   0.08291030601  0.012834880869
## 12 0.0000002621831770193 0.00000067754782321571   0.00034574606  0.000040965240
## 13 0.0000001587180370618 0.00000002587522535097   0.00324198900  0.163797653338
## 14 0.0001585400997332322 0.00016037349988395379   0.00175241745  0.000589177689
## 15 0.0111452288221809259 0.01089585144863131419   0.04192201459  0.032487762006
## 16 0.0789434984620370483 0.08802609093857562128   0.59645273817  0.449179459151
## 17 0.5034190218621402702 0.50157165450801510787   0.00026437877  0.000127423311
## 18 0.4042366528762821321 0.39722070962730948729   0.09755446114  0.074187167908
##    factor(yob)1933 factor(yob)1934 factor(yob)1935 factor(yob)1936
## 1   0.002275043403    0.0022642416    0.0027355347   0.00153149130
## 2   0.002174411663    0.0669512080    0.0002678574   0.00753849786
## 3   0.126562071037    0.0040128600    0.0782890424   0.01804404891
## 4   0.012905405754    0.0692777015    0.0078817721   0.00001810771
## 5   0.000170611760    0.0015628655    0.0073174390   0.17410183331
## 6   0.000821776089    0.0181151820    0.0209382619   0.05111323782
## 7   0.095026170482    0.1072988340    0.0606615183   0.11759296711
## 8   0.073621319373    0.0003576873    0.0302518063   0.02112288759
## 9   0.000003133578    0.0236440296    0.0239182465   0.02926844744
## 10  0.000001010799    0.0068144925    0.0111876298   0.16625492570
## 11  0.013548223477    0.0727349710    0.0053430816   0.00369480248
## 12  0.114022009446    0.0045365118    0.0840241591   0.02751170921
## 13  0.001475318923    0.0693330310    0.0002430839   0.00691757099
## 14  0.001816448780    0.0002935222    0.0003403305   0.00275196727
## 15  0.032553579892    0.0323042237    0.0388163673   0.02303035562
## 16  0.449886932765    0.4464624855    0.5401570507   0.29912272131
## 17  0.000125453596    0.0001756262    0.0002162684   0.00006844804
## 18  0.073011079184    0.0738605267    0.0874105499   0.05031598034
##      factor(yob)1937 factor(yob)1938  factor(region)1 factor(region)3
## 1  0.002277904748160  0.002265648413 0.00312047248579 0.0047761626385
## 2  0.000000004039404  0.007247532678 0.03848566502510 0.0031739985498
## 3  0.001959760864289  0.003433759064 0.04889697744384 0.0165614253729
## 4  0.016596048606110  0.000096334459 0.10300016609316 0.0148912835287
## 5  0.029196820836313  0.034403651591 0.08433353122542 0.0859679147450
## 6  0.119121546142588  0.034933377239 0.01350332132857 0.0663338715363
## 7  0.039448429956198  0.020364180755 0.00221062906453 0.0002172563981
## 8  0.062009181181049  0.256213308776 0.00000002220023 0.0000007873309
## 9  0.122738080054407  0.046840479404 0.01266426126269 0.0724497664003
## 10 0.032757203833853  0.031996480194 0.11031347222247 0.0683488999534
## 11 0.017163218777671  0.000056338478 0.09568563128167 0.0222180514431
## 12 0.001921088244011  0.002513003534 0.03913031149624 0.0140042757802
## 13 0.000004627675753  0.007077545916 0.03857565877973 0.0031458948153
## 14 0.000004145848807  0.000003707323 0.00040591522557 0.0032581182334
## 15 0.033461636661918  0.031949147353 0.37887448039598 0.5787914115702
## 16 0.448066578376753  0.448101137338 0.01059931578283 0.0157641397507
## 17 0.000112678643689  0.000166190443 0.00001547107432 0.0000078998348
## 18 0.073161045509026  0.072338177043 0.02018469761186 0.0300888421182
##     factor(region)5 factor(region)6   factor(region)7  factor(region)8
## 1  0.00188044593682 0.0047505835316 0.003158467602421 0.00406551179156
## 2  0.01759853674585 0.0045211010120 0.239715646156465 0.02105487016854
## 3  0.20366581521349 0.0577522520870 0.000329353802804 0.00977550920444
## 4  0.03755383916274 0.1213988311736 0.001485005761283 0.01048222489320
## 5  0.10191881557130 0.0008607367478 0.036365502177985 0.01169667461255
## 6  0.01255588760509 0.0007353469673 0.011411546415395 0.17642250690527
## 7  0.00004342565943 0.0000077287812 0.000026004177560 0.00115170666008
## 8  0.00000004219971 0.0000001258475 0.000000007318915 0.00000005436493
## 9  0.00838240559130 0.0008197200664 0.013264024109953 0.16911695573897
## 10 0.08414479275173 0.0023269787004 0.032326653528334 0.01848059259426
## 11 0.06092213234696 0.1090888944935 0.000283737659074 0.01087648373353
## 12 0.20494156236779 0.0703725295946 0.000003363732224 0.01086746477078
## 13 0.01908507883273 0.0046094217305 0.246823714628851 0.02202332362754
## 14 0.00106472365382 0.0006694028117 0.000000447818558 0.00006663079871
## 15 0.22815759622390 0.5762891788358 0.383467943951233 0.49270822014404
## 16 0.00580997868436 0.0155727000378 0.010285349364563 0.01465095333693
## 17 0.00000084480522 0.0000006013906 0.000021135351748 0.00000328683870
## 18 0.01227407664776 0.0302238661906 0.021032096442635 0.02655702981598
###### Supuesto de normalidad

lillie.test(reols2)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  reols2
## D = 0.099501, p-value < 0.00000000000000022
ols_plot_resid_qq(ols2)

# Con la base Inicial

summary(ols3 <- lm(lw~black+smsa+married+yob+region+qob, data=data2))
## 
## Call:
## lm(formula = lw ~ black + smsa + married + yob + region + qob, 
##     data = data2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.91033 -0.16396 -0.01509  0.14973  0.74810 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.09292   76.71720   0.679    0.509
## black       -0.43452    0.44376  -0.979    0.345
## smsa        -0.36533    0.49732  -0.735    0.476
## married     -0.64428    0.47677  -1.351    0.200
## yob         -0.02309    0.03962  -0.583    0.570
## region      -0.03878    0.03375  -1.149    0.271
## qob         -0.12406    0.09704  -1.278    0.223
## 
## Residual standard error: 0.4288 on 13 degrees of freedom
## Multiple R-squared:  0.291,  Adjusted R-squared:  -0.03622 
## F-statistic: 0.8893 on 6 and 13 DF,  p-value: 0.53
###### Linealidad
plot(ols3, 1)

reols3 <- resid(ols3)
mean(reols3)
## [1] 0.000000000000000006251239
###### Independencia 

dwtest(ols3)
## 
##  Durbin-Watson test
## 
## data:  ols3
## DW = 2.8836, p-value = 0.9804
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de  Homoscedasticidad

ols_test_breusch_pagan(ols3)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##              Data              
##  ------------------------------
##  Response : lw 
##  Variables: fitted values of lw 
## 
##         Test Summary         
##  ----------------------------
##  DF            =    1 
##  Chi2          =    0.2461636 
##  Prob > Chi2   =    0.6197895
###### Supuesto de colinialidad

ols_coll_diag(ols3)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
##   Variables Tolerance      VIF
## 1     black 0.9826738 1.017632
## 2      smsa 0.7824206 1.278085
## 3   married 0.8513163 1.174651
## 4       yob 0.8833963 1.131995
## 5    region 0.9551500 1.046956
## 6       qob 0.7825009 1.277954
## 
## 
## Eigenvalue and Condition Index
## ------------------------------
##       Eigenvalue Condition Index          intercept        black           smsa
## 1 5.510175419448        1.000000 0.0000000507351929 0.0020279668 0.001209275404
## 2 0.951925533965        2.405920 0.0000000013468709 0.9598666981 0.000005638924
## 3 0.297711818116        4.302141 0.0000001204973800 0.0181705455 0.006547887560
## 4 0.174208419517        5.624037 0.0000000900113684 0.0184751767 0.025414152852
## 5 0.050962618136       10.398168 0.0000000003857804 0.0002943934 0.358460169051
## 6 0.015015408892       19.156410 0.0000297999124711 0.0000607728 0.519963202679
## 7 0.000000781926     2654.604932 0.9999699371109362 0.0011044467 0.088399673530
##         married                yob      region          qob
## 1 0.00132903334 0.0000000508665905 0.007864183 0.0037680086
## 2 0.00001090064 0.0000000013462188 0.004391951 0.0008957469
## 3 0.00165056657 0.0000001208817100 0.922678502 0.0184855192
## 4 0.02630451152 0.0000000898872863 0.002043443 0.5832145243
## 5 0.44015417661 0.0000000008991584 0.037003443 0.0005006499
## 6 0.51460143797 0.0000300148110073 0.016344781 0.3928825587
## 7 0.01594937335 0.9999697213080286 0.009673699 0.0002529925
###### Supuesto de normalidad

lillie.test(reols3)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  reols3
## D = 0.15, p-value = 0.2805
ols_plot_resid_qq(ols3)

# Modelo 4
summary(ols4 <- lm(lw~black+smsa+married+factor(yob)+factor(region), data=data2))
## 
## Call:
## lm(formula = lw ~ black + smsa + married + factor(yob) + factor(region), 
##     data = data2)
## 
## Residuals:
##                        1                        2                        3 
## -0.000000000000000136710 -0.292325027362902722938  0.391495005367982262801 
##                        4                        5                        6 
##  0.000000000000000186653  0.000000000000000054814  0.179291009870552631167 
##                        7                        8                        9 
##  0.292325027362902889472 -0.179291009870552742189 -0.000000000000000090902 
##                       10                       11                       12 
## -0.000000000000000118658 -0.000000000000000049269  0.000000000000000075631 
##                       13                       14                       15 
##  0.000000000000000020120  0.000000000000000006242 -0.391495005367982262801 
##                       16                       17                       18 
##  0.000000000000000030528 -0.000000000000000011105 -0.000000000000000007636 
##                       19                       20 
## -0.000000000000000090902  0.000000000000000131142 
## 
## Coefficients: (1 not defined because of singularities)
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       4.6446     1.5900   2.921   0.0614 .
## black            -1.1201     0.9966  -1.124   0.3429  
## smsa              0.6719     0.9966   0.674   0.5485  
## married          -0.3380     0.6010  -0.562   0.6131  
## factor(yob)1931   1.3231     0.7360   1.798   0.1701  
## factor(yob)1932   0.8769     0.9502   0.923   0.4242  
## factor(yob)1933   1.3301     0.7360   1.807   0.1685  
## factor(yob)1934   1.4581     0.7950   1.834   0.1640  
## factor(yob)1935   1.0204     0.5204   1.961   0.1448  
## factor(yob)1936   0.9800     0.7950   1.233   0.3055  
## factor(yob)1937   1.3696     0.9502   1.441   0.2451  
## factor(yob)1938   0.3487     0.7950   0.439   0.6906  
## factor(region)1  -0.3003     0.5204  -0.577   0.6043  
## factor(region)3   0.3186     0.5204   0.612   0.5837  
## factor(region)5   0.5473     0.6010   0.911   0.4296  
## factor(region)6   0.3251     0.6010   0.541   0.6261  
## factor(region)7  -0.2695     0.6010  -0.448   0.6843  
## factor(region)8       NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4249 on 3 degrees of freedom
## Multiple R-squared:  0.8393, Adjusted R-squared:  -0.01785 
## F-statistic: 0.9792 on 16 and 3 DF,  p-value: 0.5907
###### Linealidad
plot(ols4, 1)

reols4 <- resid(ols4)
mean(reols4)
## [1] 0.000000000000000002772935
###### Independencia 

dwtest(ols4)
## 
##  Durbin-Watson test
## 
## data:  ols4
## DW = 2.4225, p-value < 0.00000000000000022
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de  Homoscedasticidad

ols_test_breusch_pagan(ols4)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##              Data              
##  ------------------------------
##  Response : lw 
##  Variables: fitted values of lw 
## 
##         Test Summary          
##  -----------------------------
##  DF            =    1 
##  Chi2          =    0.03534657 
##  Prob > Chi2   =    0.8508713
###### Supuesto de colinialidad

 
###### Supuesto de normalidad

lillie.test(reols4)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  reols4
## D = 0.35, p-value = 0.000000566
ols_plot_resid_qq(ols4)

##### Interpretación Para el modelo 1 se cumple con el supuesto de linealidad, con el test de durbin-watson se puede aceptar la hiopotesis nula permitiendo afirmar que no hay autocorrelación serial en los residuos, sin embargo, en el supuesto de homocedasticidad se rechaza la hipotesis nula y se puede afirmar que hay heterocedasticidad pero se puede solucionar por medio de una estimación por minimos cuadrados ponderados, los residuos no siguen una distribución normal segun el test de kolmogorov-smirnov porque se rechaza la hipotesis nula (la muestra proviene de una distribución normal) en este caso lo mejor es dar solución por medio de los errores robustos. Para el modelo 2 se cumple con el supuesto de linealidad, con el test de durbin-watson se puede aceptar la hiopotesis nula permitiendo afirmar que no hay autocorrelación serial en los residuos, en el supuesto de homocedasticidad se acepta la hipotesis nula y se puede afirmar que hay homocedasticidad, los residuos siguen una distribución normal segun el test de kolmogorov-smirnov porque no se rechaza la hipotesis nula (la muestra proviene de una distribución normal). Para el modelo 3 se cumple con el supuesto de linealidad, con el test de durbin-watson se puede aceptar la hiopotesis nula permitiendo afirmar que no hay autocorrelación serial en los residuos, en el supuesto de homocedasticidad se acepta la hipotesis nula y se puede afirmar que hay homocedasticidad, los residuos siguen una distribución normal segun el test de kolmogorov-smirnov porque no se rechaza la hipotesis nula (la muestra proviene de una distribución normal). Para el modelo 4 se cumple con el supuesto de linealidad, con el test de durbin-watson se puede rechazar la hiopotesis nula permitiendo afirmar que hay autocorrelación serial en los residuos y lo mejor es dar solución por medio de una estimación prais-winstein, sin embargo, en el supuesto de homocedasticidad se acepta la hipotesis nula y se puede afirmar que hay homocedasticidad, los residuos no siguen una distribución normal segun el test de kolmogorov-smirnov porque se rechaza la hipotesis nula (la muestra proviene de una distribución normal) en este caso lo mejor es dar solución por medio de los errores robustos. En general en los modelos las variables no fueron estadisticamente significativas(en el modelo 1 la variable smsa a un 5% de significancia, en el modelo 2 la variable es smsa y la región 5 son significativas al 5%) y no se presento multicolinealidad, lo que se puede evidenciar por medio de los VI.

# Modelo 1
summary(iv <- ivreg(lw~edu+black+smsa+married+yob+region+qob|factor(qob)+black+smsa+married+yob+region+qob, data=data))
## 
## Call:
## ivreg(formula = lw ~ edu + black + smsa + married + yob + region + 
##     qob | factor(qob) + black + smsa + married + yob + region + 
##     qob, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2254 -0.5293  0.0382  0.5788  3.4085 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  1.3701130  5.9164503   0.232    0.817
## edu          0.2760814  0.3129772   0.882    0.378
## black       -0.0058203  0.0147124  -0.396    0.692
## smsa        -0.0035758  0.0210539  -0.170    0.865
## married      0.0012818  0.0104799   0.122    0.903
## yob          0.0006297  0.0012804   0.492    0.623
## region      -0.0004480  0.0007912  -0.566    0.571
## qob          0.0005395  0.0019917   0.271    0.786
## 
## Residual standard error: 0.8649 on 152312 degrees of freedom
## Multiple R-Squared: -1.844,  Adjusted R-squared: -1.844 
## Wald test: 0.4903 on 7 and 152312 DF,  p-value: 0.8424
# Pruebas del instrumento
u <- resid(iv)
cor(u, data$qob)
## [1] 0.000000000000002586146
cor(data$edu, data$qob)
## [1] 0.0002492395
# Modelo 2
summary(iv2 <- ivreg(lw~edu+black+smsa+married+factor(yob)+factor(region)|factor(qob*yob)+black+smsa+married+factor(yob)+factor(region), data=data))
## 
## Call:
## ivreg(formula = lw ~ edu + black + smsa + married + factor(yob) + 
##     factor(region) | factor(qob * yob) + black + smsa + married + 
##     factor(yob) + factor(region), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.72032 -0.21187  0.09573  0.33245  1.14849 
## 
## Coefficients:
##                   Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)      6.1901027  0.3854538  16.059 <0.0000000000000002 ***
## edu             -0.0170518  0.0313829  -0.543              0.5869    
## black            0.0041257  0.0061476   0.671              0.5022    
## smsa             0.0137290  0.0063561   2.160              0.0308 *  
## married          0.0037073  0.0060509   0.613              0.5401    
## factor(yob)1931  0.0054135  0.0051233   1.057              0.2907    
## factor(yob)1932  0.0048622  0.0060082   0.809              0.4184    
## factor(yob)1933  0.0067518  0.0058973   1.145              0.2523    
## factor(yob)1934  0.0023682  0.0059419   0.399              0.6902    
## factor(yob)1935  0.0026265  0.0054111   0.485              0.6274    
## factor(yob)1936  0.0080008  0.0072315   1.106              0.2686    
## factor(yob)1937  0.0053433  0.0059271   0.902              0.3673    
## factor(yob)1938 -0.0021240  0.0061017  -0.348              0.7278    
## factor(region)1 -0.0004684  0.0055498  -0.084              0.9327    
## factor(region)3 -0.0089536  0.0044206  -2.025              0.0428 *  
## factor(region)5  0.0027201  0.0067783   0.401              0.6882    
## factor(region)6 -0.0057652  0.0041849  -1.378              0.1683    
## factor(region)7 -0.0085816  0.0051384  -1.670              0.0949 .  
## factor(region)8 -0.0047993  0.0046754  -1.026              0.3047    
## 
## Diagnostic tests:
##                     df1    df2 statistic     p-value    
## Weak instruments      3 152275     1.616       0.183    
## Wu-Hausman            1 152300     0.296       0.586    
## Sargan                2     NA    30.976 0.000000188 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5146 on 152301 degrees of freedom
## Multiple R-Squared: -0.006894,   Adjusted R-squared: -0.007013 
## Wald test: 0.9828 on 18 and 152301 DF,  p-value: 0.4762
u2 <- resid(iv2)
cor(u, data$qob*data$yob)
## [1] 0.000003865908
cor(data$edu,data$qob*data$yob)
## [1] 0.0002395593

Interpretación

En la estimación 1 de variables instrumentales se puede afirmar que el instrumento qob es débil puesto que su relación con la variable educación es muy cercana a 0 y para que sea un buen instrumento debe cumplir con los supuestos (cov(z,u)=0 y cov(x,z)!=0), sin embargo, solo cumple con no tener correlación con los residuos. En la estimación 2 de variables instrumentales el instrumento (i.qob*i.yob) no es un buen instrumento ya que sufre el mismo problema que la estimación 1, la corrleación entre el instrumento y la variable instrumentada es casi 0,cumpliendo unicamente con el supuesto de que los errores no se correlacionan con el instrumento.

# Tobit
summary(tobit <- tobit(lw~black+smsa+married+yob+region+qob, data=data, subset = (state==36)))
## 
## Call:
## tobit(formula = lw ~ black + smsa + married + yob + region + 
##     qob, subset = (state == 36), data = data)
## 
## Observations:
##          Total  Left-censored     Uncensored Right-censored 
##          14881              2          14879              0 
## 
## Coefficients:
##               Estimate Std. Error  z value            Pr(>|z|)    
## (Intercept)  7.8485081  3.2160451    2.440              0.0147 *  
## black        0.0301383  0.0203386    1.482              0.1384    
## smsa         0.0108805  0.0199101    0.546              0.5847    
## married     -0.0121696  0.0202434   -0.601              0.5477    
## yob         -0.0009600  0.0016632   -0.577              0.5638    
## region       0.0004162  0.0014720    0.283              0.7774    
## qob         -0.0029972  0.0038274   -0.783              0.4336    
## Log(scale)  -0.6506790  0.0057973 -112.238 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale: 0.5217 
## 
## Gaussian distribution
## Number of Newton-Raphson Iterations: 2 
## Log-likelihood: -1.144e+04 on 8 Df
## Wald-statistic: 3.904 on 6 Df, p-value: 0.68968
# Comparación de modelos
modelsummary(list("ols1"= ols1,"ols2"= ols2, "ols3"= ols3, "ols4"= ols4, "iv"= iv, "iv2"= iv2, "tobit"=tobit), tittle="Comparación modelos", stars = TRUE) 
tinytable_cc6n1s9s41mv7o7zp83w
ols1 ols2 ols3 ols4 iv iv2 tobit
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 6.378*** 5.981*** 52.093 4.645+ 1.370 6.190*** 7.849*
(0.988) (0.009) (76.717) (1.590) (5.916) (0.385) (3.216)
black 0.004 0.004 -0.435 -1.120 -0.006 0.004 0.030
(0.006) (0.006) (0.444) (0.997) (0.015) (0.006) (0.020)
smsa 0.013* 0.013* -0.365 0.672 -0.004 0.014* 0.011
(0.006) (0.006) (0.497) (0.997) (0.021) (0.006) (0.020)
married 0.004 0.004 -0.644 -0.338 0.001 0.004 -0.012
(0.006) (0.006) (0.477) (0.601) (0.010) (0.006) (0.020)
yob 0.000 -0.023 0.001 -0.001
(0.001) (0.040) (0.001) (0.002)
region -0.001 -0.039 0.000 0.000
(0.000) (0.034) (0.001) (0.001)
qob 0.001 -0.124 0.001 -0.003
(0.001) (0.097) (0.002) (0.004)
factor(yob)1931 0.005 1.323 0.005
(0.005) (0.736) (0.005)
factor(yob)1932 0.005 0.877 0.005
(0.006) (0.950) (0.006)
factor(yob)1933 0.007 1.330 0.007
(0.006) (0.736) (0.006)
factor(yob)1934 0.003 1.458 0.002
(0.006) (0.795) (0.006)
factor(yob)1935 0.003 1.020 0.003
(0.005) (0.520) (0.005)
factor(yob)1936 0.008 0.980 0.008
(0.007) (0.795) (0.007)
factor(yob)1937 0.006 1.370 0.005
(0.006) (0.950) (0.006)
factor(yob)1938 -0.001 0.349 -0.002
(0.006) (0.795) (0.006)
factor(region)1 -0.002 -0.300 0.000
(0.005) (0.520) (0.006)
factor(region)3 -0.010* 0.319 -0.009*
(0.004) (0.520) (0.004)
factor(region)5 0.002 0.547 0.003
(0.007) (0.601) (0.007)
factor(region)6 -0.006 0.325 -0.006
(0.004) (0.601) (0.004)
factor(region)7 -0.008 -0.269 -0.009+
(0.005) (0.601) (0.005)
factor(region)8 -0.005 -0.005
(0.004) (0.005)
edu 0.276 -0.017
(0.313) (0.031)
Num.Obs. 152320 152320 20 20 152320 152320 14881
R2 0.000 0.000 0.291 0.839 -1.844 -0.007
R2 Adj. 0.000 0.000 -0.036 -0.018 -1.844 -0.007
AIC 228835.1 228847.1 30.3 20.6 388055.8 229913.1 22889.4
BIC 228914.6 229035.9 38.2 38.5 388145.2 230111.7 22950.2
Log.Lik. -114409.545 -114404.560 -7.134 7.708
F 1.258 1.030 0.889
RMSE 0.51 0.51 0.35 0.16 0.86 0.51 0.52
Interpretación

Respecto al modelo Tobit estaba buscando evaluar si por el hecho de que Nueva York es la capital del mundo se podia encontrar evidencia de discriminación según los datos de la muestra, no se pudo obtener evidencia suficiente para afirmar o recahzar esta hipótesis ya que el coeficiente de Black aunque es positivo no es estadisticamente significativo, por lo que no hay evidencia estadistica suficiente para sustentar la hipótesis inicial.

Para la comparación de los modelos me decantaria por la estimación ols2 de minimos cuadardos ordinarios, descarto completamente las estimaciones por iv puesto que los test demuestran que no se estan utilizados buenos instrumentos, en el modelo tobit tambien se descarta puesto que los coeficientes no son estadisticamente significativos y deberia estudiarse con mayor profundidad la muestra para recomendar una censura especifica, los modelos ols3 y ols4 son descartados inmediatamente por no tener significancia en ninguna de sus variables explicatoria además de recopilar unicamente 20 datos para el estudio lo que a mí consideración no representa una muestra significativa para todas los efectos que desea capturar de la variables explicatorias, en modelo 1 lo descarto ya que la unica variable explicatoria significativa es smsa y sufre de varios problemas como heterocedasticidad y una distribución de los residuos no normal, el modelo ols2 a pesar de que solo cuenta con la variable smsa significativa incluye tambien los efectos categoricos de la variable yob lo que permite estudiar de manera detallada la incidencia de cada región en el salario, destacando la región 3 con un coeficiente significativo al 5%, finalmente este modelo cumple con los supuestos de ols y presenta uno de los estadisticos AIC y BIC ,ás bajos.