# Punto 1
#Punto 2
q <- c(60323, 61122, 60171, 61187, 63221, 63639, 64989, 63761)
a <- c(830,885, 882,895, 962,981, 990,1000)
b <- c(234289, 259426, 258054, 284599, 328975, 346999,365385, 363112)
c <- c(2356,2325,3682,3351,2099,1932,1870,3578)
d <- c(1590,1456,1616,1650,3099,3594,3547,3350)
data <- data.frame(q,a,b,c,d)
summary(olsi <- lm(q~a+b, data = data))
##
## Call:
## lm(formula = q ~ a + b, data = data)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## 171.91 548.99 -378.95 -571.66 208.02 -22.47 490.26 -446.11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61591.27049 11410.69431 5.398 0.00295 **
## a -16.92866 20.10015 -0.842 0.43808
## b 0.05382 0.02421 2.223 0.07682 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 506.9 on 5 degrees of freedom
## Multiple R-squared: 0.9443, Adjusted R-squared: 0.9221
## F-statistic: 42.41 on 2 and 5 DF, p-value: 0.0007312
anova(olsi)
## Analysis of Variance Table
##
## Response: q
## Df Sum Sq Mean Sq F value Pr(>F)
## a 1 20519754 20519754 79.8753 0.0002922 ***
## b 1 1269765 1269765 4.9427 0.0768170 .
## Residuals 5 1284487 256897
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
var2 <- 256897
desviación_estandar <- 256897^(1/2)
desviación_estandar
## [1] 506.8501
predict(olsi)
## 1 2 3 4 5 6 7 8
## 60151.09 60573.01 60549.95 61758.66 63012.98 63661.47 64498.74 64207.11
summary(ivi <- ivreg(q~a+b|c+d, data = data))
##
## Call:
## ivreg(formula = q ~ a + b | c + d, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2592.7 -1680.0 353.4 1370.2 2783.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 174177.1259 261796.7100 0.665 0.535
## a -216.1424 463.4645 -0.466 0.661
## b 0.2908 0.5522 0.527 0.621
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments (a) 2 5 27.394 0.00202 **
## Weak instruments (b) 2 5 36.789 0.00102 **
## Wu-Hausman 2 3 6.351 0.08351 .
## Sargan 0 NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2303 on 5 degrees of freedom
## Multiple R-Squared: -0.1493, Adjusted R-squared: -0.6091
## Wald test: 1.986 on 2 and 5 DF, p-value: 0.2319
modelsummary(list("olsi"=olsi,"ivi"=ivi))
| olsi | ivi | |
|---|---|---|
| (Intercept) | 61591.270 | 174177.126 |
| (11410.694) | (261796.710) | |
| a | -16.929 | -216.142 |
| (20.100) | (463.465) | |
| b | 0.054 | 0.291 |
| (0.024) | (0.552) | |
| Num.Obs. | 8 | 8 |
| R2 | 0.944 | -0.149 |
| R2 Adj. | 0.922 | -0.609 |
| AIC | 126.6 | 150.8 |
| BIC | 126.9 | 151.1 |
| Log.Lik. | -59.297 | |
| F | 42.409 | |
| RMSE | 400.70 | 1820.71 |
Para el modelo incial por minimos cuadrados el Ba es -16.929, el Bb 0.054 y los predecidos de la variable dependiente q se hallaron utilizado el comando predict y se ecnuentran en los resultados presentados en la consola, finalmente la desviación estandar de este modelo es de 506.8501. Para el modelo por variables instrumentales utilizando c y d como instrumentos de a y b respectivamente se obtuvo que el Ba es -216.142 y el Bb es 0.291.
# Cargar datos
P1t_1_ <- read_excel("C:/Users/LABSIS/Downloads/P1t(1).xlsx")
View(P1t_1_)
# Crear un n de 98430
n_sim <- 153247
# Simulación de las variables
set.seed(123) # Para reproducibilidad
simulation <- data.frame(
ageq = rnorm(n_sim, mean(P1t_1_$ageq, na.rm=TRUE), sd(P1t_1_$ageq, na.rm=TRUE)),
edu = rnorm(n_sim, mean(P1t_1_$edu, na.rm=TRUE), sd(P1t_1_$edu, na.rm=TRUE)),
married = rbinom(n_sim, 1, mean(P1t_1_$married, na.rm=TRUE)),
state = sample(P1t_1_$state, n_sim, replace = TRUE),
qob = sample(P1t_1_$qob, n_sim, replace = TRUE),
black = rbinom(n_sim, 1, mean(P1t_1_$black, na.rm=TRUE)),
smsa = rbinom(n_sim, 1, mean(P1t_1_$smsa, na.rm=TRUE)),
yob = sample(P1t_1_$yob, n_sim, replace = TRUE),
region = sample(P1t_1_$region, n_sim, replace = TRUE),
wage = rnorm(n_sim, mean(P1t_1_$wage, na.rm=TRUE), sd(P1t_1_$wage, na.rm=TRUE)))
View(simulation)
data <- simulation[!(simulation$wage<= 0 ),]
#View(data)
data$lw <- log(data$wage)
data2 <- P1t_1_
#View(data2)
data2$lw <- log(data2$wage)
# Estimaciones
summary(ols1 <- lm(lw~black+smsa+married+yob+region+qob, data=data))
##
## Call:
## lm(formula = lw ~ black + smsa + married + yob + region + qob,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6694 -0.2100 0.0969 0.3310 1.1101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.3776695 0.9883994 6.453 0.00000000011 ***
## black 0.0035528 0.0060338 0.589 0.5560
## smsa 0.0126594 0.0060622 2.088 0.0368 *
## married 0.0035480 0.0060244 0.589 0.5559
## yob -0.0002053 0.0005111 -0.402 0.6879
## region -0.0006310 0.0004527 -1.394 0.1634
## qob 0.0007012 0.0011759 0.596 0.5510
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5128 on 152313 degrees of freedom
## Multiple R-squared: 4.956e-05, Adjusted R-squared: 1.017e-05
## F-statistic: 1.258 on 6 and 152313 DF, p-value: 0.2731
###### Linealidad
plot(ols1, 1)
reols1 <- resid(ols1)
mean(reols1)
## [1] -0.000000000000000003951084
###### Independencia
dwtest(ols1)
##
## Durbin-Watson test
##
## data: ols1
## DW = 1.9996, p-value = 0.472
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de Homoscedasticidad
ols_test_breusch_pagan(ols1)
##
## Breusch Pagan Test for Heteroskedasticity
## -----------------------------------------
## Ho: the variance is constant
## Ha: the variance is not constant
##
## Data
## ------------------------------
## Response : lw
## Variables: fitted values of lw
##
## Test Summary
## ---------------------------------
## DF = 1
## Chi2 = 24.62947
## Prob > Chi2 = 0.000000694809
###### Supuesto de colinialidad
ols_coll_diag(ols1)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
## Variables Tolerance VIF
## 1 black 0.9999381 1.000062
## 2 smsa 0.9999360 1.000064
## 3 married 0.9999370 1.000063
## 4 yob 0.9999808 1.000019
## 5 region 0.9999763 1.000024
## 6 qob 0.9999775 1.000023
##
##
## Eigenvalue and Condition Index
## ------------------------------
## Eigenvalue Condition Index intercept black
## 1 5.5372886511330 1.000000 0.0000000565848697 0.0022150086699
## 2 0.9408606574249 2.425973 0.0000000035536710 0.9964034376383
## 3 0.3035545675652 4.271006 0.0000000952673356 0.0006129094277
## 4 0.1445343676180 6.189606 0.0000004273810870 0.0006096429742
## 5 0.0495027985807 10.576299 0.0000000008407888 0.0000007387505
## 6 0.0242580739532 15.108468 0.0000196338807576 0.0001580838157
## 7 0.0000008837251 2503.167928 0.9999797824914901 0.0000001787236
## smsa married yob region
## 1 0.001531730526 0.001551530526 0.0000000565865019 0.008123575141
## 2 0.000099074780 0.000101523242 0.0000000035538853 0.001121942662
## 3 0.003659646607 0.003592495368 0.0000000952639686 0.945575304709
## 4 0.026135519164 0.027169570204 0.0000004273546639 0.030645550102
## 5 0.493835869579 0.511248816314 0.0000000008421679 0.000006197033
## 6 0.474736055479 0.456330510495 0.0000196357781759 0.014524013231
## 7 0.000002103865 0.000005553852 0.9999797806206362 0.000003417122
## qob
## 1 0.004873257609
## 2 0.000459269899
## 3 0.041678019290
## 4 0.910658108701
## 5 0.000039080175
## 6 0.042289867322
## 7 0.000002397004
###### Supuesto de normalidad
lillie.test(reols1)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: reols1
## D = 0.099484, p-value < 0.00000000000000022
ols_plot_resid_qq(ols1)
summary(ols2 <- lm(lw~black+smsa+married+factor(yob)+factor(region), data=data))
##
## Call:
## lm(formula = lw ~ black + smsa + married + factor(yob) + factor(region),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6709 -0.2100 0.0971 0.3311 1.1174
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.980731 0.009472 631.438 <0.0000000000000002 ***
## black 0.003549 0.006034 0.588 0.5565
## smsa 0.012729 0.006063 2.100 0.0358 *
## married 0.003572 0.006025 0.593 0.5533
## factor(yob)1931 0.005481 0.005104 1.074 0.2829
## factor(yob)1932 0.005496 0.005873 0.936 0.3494
## factor(yob)1933 0.006772 0.005877 1.152 0.2492
## factor(yob)1934 0.002697 0.005890 0.458 0.6470
## factor(yob)1935 0.002924 0.005365 0.545 0.5858
## factor(yob)1936 0.007641 0.007176 1.065 0.2870
## factor(yob)1937 0.005643 0.005881 0.959 0.3373
## factor(yob)1938 -0.001306 0.005892 -0.222 0.8246
## factor(region)1 -0.001600 0.005126 -0.312 0.7549
## factor(region)3 -0.009757 0.004151 -2.351 0.0187 *
## factor(region)5 0.001966 0.006612 0.297 0.7662
## factor(region)6 -0.005925 0.004160 -1.424 0.1543
## factor(region)7 -0.008296 0.005094 -1.629 0.1034
## factor(region)8 -0.005479 0.004489 -1.220 0.2223
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5128 on 152302 degrees of freedom
## Multiple R-squared: 0.000115, Adjusted R-squared: 3.399e-06
## F-statistic: 1.03 on 17 and 152302 DF, p-value: 0.4199
###### Linealidad
plot(ols2, 1)
reols2 <- resid(ols2)
mean(reols2)
## [1] -0.000000000000000009015737
###### Independencia
dwtest(ols2)
##
## Durbin-Watson test
##
## data: ols2
## DW = 1.9995, p-value = 0.4639
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de Homoscedasticidad
ols_test_breusch_pagan(ols2)
##
## Breusch Pagan Test for Heteroskedasticity
## -----------------------------------------
## Ho: the variance is constant
## Ha: the variance is not constant
##
## Data
## ------------------------------
## Response : lw
## Variables: fitted values of lw
##
## Test Summary
## --------------------------------------------
## DF = 1
## Chi2 = 75.62005
## Prob > Chi2 = 0.00000000000000000343852
###### Supuesto de colinialidad
ols_coll_diag(ols2)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
## Variables Tolerance VIF
## 1 black 0.9998728 1.000127
## 2 smsa 0.9998124 1.000188
## 3 married 0.9998819 1.000118
## 4 factor(yob)1931 0.4166396 2.400156
## 5 factor(yob)1932 0.5526519 1.809457
## 6 factor(yob)1933 0.5531691 1.807765
## 7 factor(yob)1934 0.5551674 1.801259
## 8 factor(yob)1935 0.4677779 2.137767
## 9 factor(yob)1936 0.6983442 1.431959
## 10 factor(yob)1937 0.5538217 1.805635
## 11 factor(yob)1938 0.5554842 1.800231
## 12 factor(region)1 0.7417245 1.348209
## 13 factor(region)3 0.6207446 1.610968
## 14 factor(region)5 0.8424662 1.186991
## 15 factor(region)6 0.6220543 1.607577
## 16 factor(region)7 0.7385185 1.354062
## 17 factor(region)8 0.6691617 1.494407
##
##
## Eigenvalue and Condition Index
## ------------------------------
## Eigenvalue Condition Index intercept black
## 1 4.74022522 1.000000 0.0008400300587019 0.003255773688131
## 2 1.01350337 2.162653 0.0000000153279455 0.000204840771264
## 3 1.00771076 2.168860 0.0000000024562133 0.000040013213440
## 4 1.00502298 2.171758 0.0000000002511476 0.000039237353240
## 5 1.00388214 2.172992 0.0000000146833471 0.000321657844413
## 6 1.00265360 2.174323 0.0000001777590864 0.002258165483869
## 7 1.00042753 2.176741 0.0000003275558286 0.006384494615557
## 8 1.00000137 2.177204 0.0000000015222155 0.000000001650735
## 9 0.99748623 2.179947 0.0000000216441656 0.000118418374585
## 10 0.99627632 2.181271 0.0000001821162782 0.002083127545213
## 11 0.99508734 2.182574 0.0000000980134729 0.001475975806442
## 12 0.99248134 2.185437 0.0000001698905167 0.002765557418469
## 13 0.98652569 2.192024 0.0000000122745427 0.000204882434409
## 14 0.93726088 2.248895 0.0000608661776200 0.980062935159352
## 15 0.17327177 5.230408 0.0023093602978781 0.000365118581367
## 16 0.08423613 7.501537 0.0056819050982504 0.000144819185217
## 17 0.04949132 9.786671 0.0000090458412195 0.000000657855008
## 18 0.01445600 18.108203 0.9910977690315700 0.000274323019288
## smsa married factor(yob)1931 factor(yob)1932
## 1 0.0020946192042749306 0.00212158901679229510 0.00301650308 0.002278600726
## 2 0.0000000353271849312 0.00000002680940110251 0.00277172648 0.159310088623
## 3 0.0000000030663173925 0.00000000000003152622 0.00001842631 0.000009764602
## 4 0.0000000000006515996 0.00000000099320405804 0.08378782512 0.015096797546
## 5 0.0000000317754613322 0.00000005982515238745 0.01061273762 0.023938206552
## 6 0.0000002329095384135 0.00000064227463380999 0.00149329537 0.007434247234
## 7 0.0000011236104414942 0.00000130032530286667 0.03040417430 0.020010504610
## 8 0.0000000195932688980 0.00000000023459988115 0.03553827694 0.000137434785
## 9 0.0000000424885946063 0.00000010270841809298 0.00083286992 0.006639201381
## 10 0.0000003665803540798 0.00000055373788153599 0.00708011367 0.031900664428
## 11 0.0000001624203245834 0.00000034062911836758 0.08291030601 0.012834880869
## 12 0.0000002621831770193 0.00000067754782321571 0.00034574606 0.000040965240
## 13 0.0000001587180370618 0.00000002587522535097 0.00324198900 0.163797653338
## 14 0.0001585400997332322 0.00016037349988395379 0.00175241745 0.000589177689
## 15 0.0111452288221809259 0.01089585144863131419 0.04192201459 0.032487762006
## 16 0.0789434984620370483 0.08802609093857562128 0.59645273817 0.449179459151
## 17 0.5034190218621402702 0.50157165450801510787 0.00026437877 0.000127423311
## 18 0.4042366528762821321 0.39722070962730948729 0.09755446114 0.074187167908
## factor(yob)1933 factor(yob)1934 factor(yob)1935 factor(yob)1936
## 1 0.002275043403 0.0022642416 0.0027355347 0.00153149130
## 2 0.002174411663 0.0669512080 0.0002678574 0.00753849786
## 3 0.126562071037 0.0040128600 0.0782890424 0.01804404891
## 4 0.012905405754 0.0692777015 0.0078817721 0.00001810771
## 5 0.000170611760 0.0015628655 0.0073174390 0.17410183331
## 6 0.000821776089 0.0181151820 0.0209382619 0.05111323782
## 7 0.095026170482 0.1072988340 0.0606615183 0.11759296711
## 8 0.073621319373 0.0003576873 0.0302518063 0.02112288759
## 9 0.000003133578 0.0236440296 0.0239182465 0.02926844744
## 10 0.000001010799 0.0068144925 0.0111876298 0.16625492570
## 11 0.013548223477 0.0727349710 0.0053430816 0.00369480248
## 12 0.114022009446 0.0045365118 0.0840241591 0.02751170921
## 13 0.001475318923 0.0693330310 0.0002430839 0.00691757099
## 14 0.001816448780 0.0002935222 0.0003403305 0.00275196727
## 15 0.032553579892 0.0323042237 0.0388163673 0.02303035562
## 16 0.449886932765 0.4464624855 0.5401570507 0.29912272131
## 17 0.000125453596 0.0001756262 0.0002162684 0.00006844804
## 18 0.073011079184 0.0738605267 0.0874105499 0.05031598034
## factor(yob)1937 factor(yob)1938 factor(region)1 factor(region)3
## 1 0.002277904748160 0.002265648413 0.00312047248579 0.0047761626385
## 2 0.000000004039404 0.007247532678 0.03848566502510 0.0031739985498
## 3 0.001959760864289 0.003433759064 0.04889697744384 0.0165614253729
## 4 0.016596048606110 0.000096334459 0.10300016609316 0.0148912835287
## 5 0.029196820836313 0.034403651591 0.08433353122542 0.0859679147450
## 6 0.119121546142588 0.034933377239 0.01350332132857 0.0663338715363
## 7 0.039448429956198 0.020364180755 0.00221062906453 0.0002172563981
## 8 0.062009181181049 0.256213308776 0.00000002220023 0.0000007873309
## 9 0.122738080054407 0.046840479404 0.01266426126269 0.0724497664003
## 10 0.032757203833853 0.031996480194 0.11031347222247 0.0683488999534
## 11 0.017163218777671 0.000056338478 0.09568563128167 0.0222180514431
## 12 0.001921088244011 0.002513003534 0.03913031149624 0.0140042757802
## 13 0.000004627675753 0.007077545916 0.03857565877973 0.0031458948153
## 14 0.000004145848807 0.000003707323 0.00040591522557 0.0032581182334
## 15 0.033461636661918 0.031949147353 0.37887448039598 0.5787914115702
## 16 0.448066578376753 0.448101137338 0.01059931578283 0.0157641397507
## 17 0.000112678643689 0.000166190443 0.00001547107432 0.0000078998348
## 18 0.073161045509026 0.072338177043 0.02018469761186 0.0300888421182
## factor(region)5 factor(region)6 factor(region)7 factor(region)8
## 1 0.00188044593682 0.0047505835316 0.003158467602421 0.00406551179156
## 2 0.01759853674585 0.0045211010120 0.239715646156465 0.02105487016854
## 3 0.20366581521349 0.0577522520870 0.000329353802804 0.00977550920444
## 4 0.03755383916274 0.1213988311736 0.001485005761283 0.01048222489320
## 5 0.10191881557130 0.0008607367478 0.036365502177985 0.01169667461255
## 6 0.01255588760509 0.0007353469673 0.011411546415395 0.17642250690527
## 7 0.00004342565943 0.0000077287812 0.000026004177560 0.00115170666008
## 8 0.00000004219971 0.0000001258475 0.000000007318915 0.00000005436493
## 9 0.00838240559130 0.0008197200664 0.013264024109953 0.16911695573897
## 10 0.08414479275173 0.0023269787004 0.032326653528334 0.01848059259426
## 11 0.06092213234696 0.1090888944935 0.000283737659074 0.01087648373353
## 12 0.20494156236779 0.0703725295946 0.000003363732224 0.01086746477078
## 13 0.01908507883273 0.0046094217305 0.246823714628851 0.02202332362754
## 14 0.00106472365382 0.0006694028117 0.000000447818558 0.00006663079871
## 15 0.22815759622390 0.5762891788358 0.383467943951233 0.49270822014404
## 16 0.00580997868436 0.0155727000378 0.010285349364563 0.01465095333693
## 17 0.00000084480522 0.0000006013906 0.000021135351748 0.00000328683870
## 18 0.01227407664776 0.0302238661906 0.021032096442635 0.02655702981598
###### Supuesto de normalidad
lillie.test(reols2)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: reols2
## D = 0.099501, p-value < 0.00000000000000022
ols_plot_resid_qq(ols2)
# Con la base Inicial
summary(ols3 <- lm(lw~black+smsa+married+yob+region+qob, data=data2))
##
## Call:
## lm(formula = lw ~ black + smsa + married + yob + region + qob,
## data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.91033 -0.16396 -0.01509 0.14973 0.74810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.09292 76.71720 0.679 0.509
## black -0.43452 0.44376 -0.979 0.345
## smsa -0.36533 0.49732 -0.735 0.476
## married -0.64428 0.47677 -1.351 0.200
## yob -0.02309 0.03962 -0.583 0.570
## region -0.03878 0.03375 -1.149 0.271
## qob -0.12406 0.09704 -1.278 0.223
##
## Residual standard error: 0.4288 on 13 degrees of freedom
## Multiple R-squared: 0.291, Adjusted R-squared: -0.03622
## F-statistic: 0.8893 on 6 and 13 DF, p-value: 0.53
###### Linealidad
plot(ols3, 1)
reols3 <- resid(ols3)
mean(reols3)
## [1] 0.000000000000000006251239
###### Independencia
dwtest(ols3)
##
## Durbin-Watson test
##
## data: ols3
## DW = 2.8836, p-value = 0.9804
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de Homoscedasticidad
ols_test_breusch_pagan(ols3)
##
## Breusch Pagan Test for Heteroskedasticity
## -----------------------------------------
## Ho: the variance is constant
## Ha: the variance is not constant
##
## Data
## ------------------------------
## Response : lw
## Variables: fitted values of lw
##
## Test Summary
## ----------------------------
## DF = 1
## Chi2 = 0.2461636
## Prob > Chi2 = 0.6197895
###### Supuesto de colinialidad
ols_coll_diag(ols3)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
## Variables Tolerance VIF
## 1 black 0.9826738 1.017632
## 2 smsa 0.7824206 1.278085
## 3 married 0.8513163 1.174651
## 4 yob 0.8833963 1.131995
## 5 region 0.9551500 1.046956
## 6 qob 0.7825009 1.277954
##
##
## Eigenvalue and Condition Index
## ------------------------------
## Eigenvalue Condition Index intercept black smsa
## 1 5.510175419448 1.000000 0.0000000507351929 0.0020279668 0.001209275404
## 2 0.951925533965 2.405920 0.0000000013468709 0.9598666981 0.000005638924
## 3 0.297711818116 4.302141 0.0000001204973800 0.0181705455 0.006547887560
## 4 0.174208419517 5.624037 0.0000000900113684 0.0184751767 0.025414152852
## 5 0.050962618136 10.398168 0.0000000003857804 0.0002943934 0.358460169051
## 6 0.015015408892 19.156410 0.0000297999124711 0.0000607728 0.519963202679
## 7 0.000000781926 2654.604932 0.9999699371109362 0.0011044467 0.088399673530
## married yob region qob
## 1 0.00132903334 0.0000000508665905 0.007864183 0.0037680086
## 2 0.00001090064 0.0000000013462188 0.004391951 0.0008957469
## 3 0.00165056657 0.0000001208817100 0.922678502 0.0184855192
## 4 0.02630451152 0.0000000898872863 0.002043443 0.5832145243
## 5 0.44015417661 0.0000000008991584 0.037003443 0.0005006499
## 6 0.51460143797 0.0000300148110073 0.016344781 0.3928825587
## 7 0.01594937335 0.9999697213080286 0.009673699 0.0002529925
###### Supuesto de normalidad
lillie.test(reols3)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: reols3
## D = 0.15, p-value = 0.2805
ols_plot_resid_qq(ols3)
# Modelo 4
summary(ols4 <- lm(lw~black+smsa+married+factor(yob)+factor(region), data=data2))
##
## Call:
## lm(formula = lw ~ black + smsa + married + factor(yob) + factor(region),
## data = data2)
##
## Residuals:
## 1 2 3
## -0.000000000000000136710 -0.292325027362902722938 0.391495005367982262801
## 4 5 6
## 0.000000000000000186653 0.000000000000000054814 0.179291009870552631167
## 7 8 9
## 0.292325027362902889472 -0.179291009870552742189 -0.000000000000000090902
## 10 11 12
## -0.000000000000000118658 -0.000000000000000049269 0.000000000000000075631
## 13 14 15
## 0.000000000000000020120 0.000000000000000006242 -0.391495005367982262801
## 16 17 18
## 0.000000000000000030528 -0.000000000000000011105 -0.000000000000000007636
## 19 20
## -0.000000000000000090902 0.000000000000000131142
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.6446 1.5900 2.921 0.0614 .
## black -1.1201 0.9966 -1.124 0.3429
## smsa 0.6719 0.9966 0.674 0.5485
## married -0.3380 0.6010 -0.562 0.6131
## factor(yob)1931 1.3231 0.7360 1.798 0.1701
## factor(yob)1932 0.8769 0.9502 0.923 0.4242
## factor(yob)1933 1.3301 0.7360 1.807 0.1685
## factor(yob)1934 1.4581 0.7950 1.834 0.1640
## factor(yob)1935 1.0204 0.5204 1.961 0.1448
## factor(yob)1936 0.9800 0.7950 1.233 0.3055
## factor(yob)1937 1.3696 0.9502 1.441 0.2451
## factor(yob)1938 0.3487 0.7950 0.439 0.6906
## factor(region)1 -0.3003 0.5204 -0.577 0.6043
## factor(region)3 0.3186 0.5204 0.612 0.5837
## factor(region)5 0.5473 0.6010 0.911 0.4296
## factor(region)6 0.3251 0.6010 0.541 0.6261
## factor(region)7 -0.2695 0.6010 -0.448 0.6843
## factor(region)8 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4249 on 3 degrees of freedom
## Multiple R-squared: 0.8393, Adjusted R-squared: -0.01785
## F-statistic: 0.9792 on 16 and 3 DF, p-value: 0.5907
###### Linealidad
plot(ols4, 1)
reols4 <- resid(ols4)
mean(reols4)
## [1] 0.000000000000000002772935
###### Independencia
dwtest(ols4)
##
## Durbin-Watson test
##
## data: ols4
## DW = 2.4225, p-value < 0.00000000000000022
## alternative hypothesis: true autocorrelation is greater than 0
###### Supuesto de Homoscedasticidad
ols_test_breusch_pagan(ols4)
##
## Breusch Pagan Test for Heteroskedasticity
## -----------------------------------------
## Ho: the variance is constant
## Ha: the variance is not constant
##
## Data
## ------------------------------
## Response : lw
## Variables: fitted values of lw
##
## Test Summary
## -----------------------------
## DF = 1
## Chi2 = 0.03534657
## Prob > Chi2 = 0.8508713
###### Supuesto de colinialidad
###### Supuesto de normalidad
lillie.test(reols4)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: reols4
## D = 0.35, p-value = 0.000000566
ols_plot_resid_qq(ols4)
##### Interpretación Para el modelo 1 se cumple con el supuesto de
linealidad, con el test de durbin-watson se puede aceptar la hiopotesis
nula permitiendo afirmar que no hay autocorrelación serial en los
residuos, sin embargo, en el supuesto de homocedasticidad se rechaza la
hipotesis nula y se puede afirmar que hay heterocedasticidad pero se
puede solucionar por medio de una estimación por minimos cuadrados
ponderados, los residuos no siguen una distribución normal segun el test
de kolmogorov-smirnov porque se rechaza la hipotesis nula (la muestra
proviene de una distribución normal) en este caso lo mejor es dar
solución por medio de los errores robustos. Para el modelo 2 se cumple
con el supuesto de linealidad, con el test de durbin-watson se puede
aceptar la hiopotesis nula permitiendo afirmar que no hay
autocorrelación serial en los residuos, en el supuesto de
homocedasticidad se acepta la hipotesis nula y se puede afirmar que hay
homocedasticidad, los residuos siguen una distribución normal segun el
test de kolmogorov-smirnov porque no se rechaza la hipotesis nula (la
muestra proviene de una distribución normal). Para el modelo 3 se cumple
con el supuesto de linealidad, con el test de durbin-watson se puede
aceptar la hiopotesis nula permitiendo afirmar que no hay
autocorrelación serial en los residuos, en el supuesto de
homocedasticidad se acepta la hipotesis nula y se puede afirmar que hay
homocedasticidad, los residuos siguen una distribución normal segun el
test de kolmogorov-smirnov porque no se rechaza la hipotesis nula (la
muestra proviene de una distribución normal). Para el modelo 4 se cumple
con el supuesto de linealidad, con el test de durbin-watson se puede
rechazar la hiopotesis nula permitiendo afirmar que hay autocorrelación
serial en los residuos y lo mejor es dar solución por medio de una
estimación prais-winstein, sin embargo, en el supuesto de
homocedasticidad se acepta la hipotesis nula y se puede afirmar que hay
homocedasticidad, los residuos no siguen una distribución normal segun
el test de kolmogorov-smirnov porque se rechaza la hipotesis nula (la
muestra proviene de una distribución normal) en este caso lo mejor es
dar solución por medio de los errores robustos. En general en los
modelos las variables no fueron estadisticamente significativas(en el
modelo 1 la variable smsa a un 5% de significancia, en el modelo 2 la
variable es smsa y la región 5 son significativas al 5%) y no se
presento multicolinealidad, lo que se puede evidenciar por medio de los
VI.
# Modelo 1
summary(iv <- ivreg(lw~edu+black+smsa+married+yob+region+qob|factor(qob)+black+smsa+married+yob+region+qob, data=data))
##
## Call:
## ivreg(formula = lw ~ edu + black + smsa + married + yob + region +
## qob | factor(qob) + black + smsa + married + yob + region +
## qob, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2254 -0.5293 0.0382 0.5788 3.4085
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.3701130 5.9164503 0.232 0.817
## edu 0.2760814 0.3129772 0.882 0.378
## black -0.0058203 0.0147124 -0.396 0.692
## smsa -0.0035758 0.0210539 -0.170 0.865
## married 0.0012818 0.0104799 0.122 0.903
## yob 0.0006297 0.0012804 0.492 0.623
## region -0.0004480 0.0007912 -0.566 0.571
## qob 0.0005395 0.0019917 0.271 0.786
##
## Residual standard error: 0.8649 on 152312 degrees of freedom
## Multiple R-Squared: -1.844, Adjusted R-squared: -1.844
## Wald test: 0.4903 on 7 and 152312 DF, p-value: 0.8424
# Pruebas del instrumento
u <- resid(iv)
cor(u, data$qob)
## [1] 0.000000000000002586146
cor(data$edu, data$qob)
## [1] 0.0002492395
# Modelo 2
summary(iv2 <- ivreg(lw~edu+black+smsa+married+factor(yob)+factor(region)|factor(qob*yob)+black+smsa+married+factor(yob)+factor(region), data=data))
##
## Call:
## ivreg(formula = lw ~ edu + black + smsa + married + factor(yob) +
## factor(region) | factor(qob * yob) + black + smsa + married +
## factor(yob) + factor(region), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.72032 -0.21187 0.09573 0.33245 1.14849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1901027 0.3854538 16.059 <0.0000000000000002 ***
## edu -0.0170518 0.0313829 -0.543 0.5869
## black 0.0041257 0.0061476 0.671 0.5022
## smsa 0.0137290 0.0063561 2.160 0.0308 *
## married 0.0037073 0.0060509 0.613 0.5401
## factor(yob)1931 0.0054135 0.0051233 1.057 0.2907
## factor(yob)1932 0.0048622 0.0060082 0.809 0.4184
## factor(yob)1933 0.0067518 0.0058973 1.145 0.2523
## factor(yob)1934 0.0023682 0.0059419 0.399 0.6902
## factor(yob)1935 0.0026265 0.0054111 0.485 0.6274
## factor(yob)1936 0.0080008 0.0072315 1.106 0.2686
## factor(yob)1937 0.0053433 0.0059271 0.902 0.3673
## factor(yob)1938 -0.0021240 0.0061017 -0.348 0.7278
## factor(region)1 -0.0004684 0.0055498 -0.084 0.9327
## factor(region)3 -0.0089536 0.0044206 -2.025 0.0428 *
## factor(region)5 0.0027201 0.0067783 0.401 0.6882
## factor(region)6 -0.0057652 0.0041849 -1.378 0.1683
## factor(region)7 -0.0085816 0.0051384 -1.670 0.0949 .
## factor(region)8 -0.0047993 0.0046754 -1.026 0.3047
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 3 152275 1.616 0.183
## Wu-Hausman 1 152300 0.296 0.586
## Sargan 2 NA 30.976 0.000000188 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5146 on 152301 degrees of freedom
## Multiple R-Squared: -0.006894, Adjusted R-squared: -0.007013
## Wald test: 0.9828 on 18 and 152301 DF, p-value: 0.4762
u2 <- resid(iv2)
cor(u, data$qob*data$yob)
## [1] 0.000003865908
cor(data$edu,data$qob*data$yob)
## [1] 0.0002395593
En la estimación 1 de variables instrumentales se puede afirmar que el instrumento qob es débil puesto que su relación con la variable educación es muy cercana a 0 y para que sea un buen instrumento debe cumplir con los supuestos (cov(z,u)=0 y cov(x,z)!=0), sin embargo, solo cumple con no tener correlación con los residuos. En la estimación 2 de variables instrumentales el instrumento (i.qob*i.yob) no es un buen instrumento ya que sufre el mismo problema que la estimación 1, la corrleación entre el instrumento y la variable instrumentada es casi 0,cumpliendo unicamente con el supuesto de que los errores no se correlacionan con el instrumento.
# Tobit
summary(tobit <- tobit(lw~black+smsa+married+yob+region+qob, data=data, subset = (state==36)))
##
## Call:
## tobit(formula = lw ~ black + smsa + married + yob + region +
## qob, subset = (state == 36), data = data)
##
## Observations:
## Total Left-censored Uncensored Right-censored
## 14881 2 14879 0
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.8485081 3.2160451 2.440 0.0147 *
## black 0.0301383 0.0203386 1.482 0.1384
## smsa 0.0108805 0.0199101 0.546 0.5847
## married -0.0121696 0.0202434 -0.601 0.5477
## yob -0.0009600 0.0016632 -0.577 0.5638
## region 0.0004162 0.0014720 0.283 0.7774
## qob -0.0029972 0.0038274 -0.783 0.4336
## Log(scale) -0.6506790 0.0057973 -112.238 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale: 0.5217
##
## Gaussian distribution
## Number of Newton-Raphson Iterations: 2
## Log-likelihood: -1.144e+04 on 8 Df
## Wald-statistic: 3.904 on 6 Df, p-value: 0.68968
# Comparación de modelos
modelsummary(list("ols1"= ols1,"ols2"= ols2, "ols3"= ols3, "ols4"= ols4, "iv"= iv, "iv2"= iv2, "tobit"=tobit), tittle="Comparación modelos", stars = TRUE)
| ols1 | ols2 | ols3 | ols4 | iv | iv2 | tobit | |
|---|---|---|---|---|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | |||||||
| (Intercept) | 6.378*** | 5.981*** | 52.093 | 4.645+ | 1.370 | 6.190*** | 7.849* |
| (0.988) | (0.009) | (76.717) | (1.590) | (5.916) | (0.385) | (3.216) | |
| black | 0.004 | 0.004 | -0.435 | -1.120 | -0.006 | 0.004 | 0.030 |
| (0.006) | (0.006) | (0.444) | (0.997) | (0.015) | (0.006) | (0.020) | |
| smsa | 0.013* | 0.013* | -0.365 | 0.672 | -0.004 | 0.014* | 0.011 |
| (0.006) | (0.006) | (0.497) | (0.997) | (0.021) | (0.006) | (0.020) | |
| married | 0.004 | 0.004 | -0.644 | -0.338 | 0.001 | 0.004 | -0.012 |
| (0.006) | (0.006) | (0.477) | (0.601) | (0.010) | (0.006) | (0.020) | |
| yob | 0.000 | -0.023 | 0.001 | -0.001 | |||
| (0.001) | (0.040) | (0.001) | (0.002) | ||||
| region | -0.001 | -0.039 | 0.000 | 0.000 | |||
| (0.000) | (0.034) | (0.001) | (0.001) | ||||
| qob | 0.001 | -0.124 | 0.001 | -0.003 | |||
| (0.001) | (0.097) | (0.002) | (0.004) | ||||
| factor(yob)1931 | 0.005 | 1.323 | 0.005 | ||||
| (0.005) | (0.736) | (0.005) | |||||
| factor(yob)1932 | 0.005 | 0.877 | 0.005 | ||||
| (0.006) | (0.950) | (0.006) | |||||
| factor(yob)1933 | 0.007 | 1.330 | 0.007 | ||||
| (0.006) | (0.736) | (0.006) | |||||
| factor(yob)1934 | 0.003 | 1.458 | 0.002 | ||||
| (0.006) | (0.795) | (0.006) | |||||
| factor(yob)1935 | 0.003 | 1.020 | 0.003 | ||||
| (0.005) | (0.520) | (0.005) | |||||
| factor(yob)1936 | 0.008 | 0.980 | 0.008 | ||||
| (0.007) | (0.795) | (0.007) | |||||
| factor(yob)1937 | 0.006 | 1.370 | 0.005 | ||||
| (0.006) | (0.950) | (0.006) | |||||
| factor(yob)1938 | -0.001 | 0.349 | -0.002 | ||||
| (0.006) | (0.795) | (0.006) | |||||
| factor(region)1 | -0.002 | -0.300 | 0.000 | ||||
| (0.005) | (0.520) | (0.006) | |||||
| factor(region)3 | -0.010* | 0.319 | -0.009* | ||||
| (0.004) | (0.520) | (0.004) | |||||
| factor(region)5 | 0.002 | 0.547 | 0.003 | ||||
| (0.007) | (0.601) | (0.007) | |||||
| factor(region)6 | -0.006 | 0.325 | -0.006 | ||||
| (0.004) | (0.601) | (0.004) | |||||
| factor(region)7 | -0.008 | -0.269 | -0.009+ | ||||
| (0.005) | (0.601) | (0.005) | |||||
| factor(region)8 | -0.005 | -0.005 | |||||
| (0.004) | (0.005) | ||||||
| edu | 0.276 | -0.017 | |||||
| (0.313) | (0.031) | ||||||
| Num.Obs. | 152320 | 152320 | 20 | 20 | 152320 | 152320 | 14881 |
| R2 | 0.000 | 0.000 | 0.291 | 0.839 | -1.844 | -0.007 | |
| R2 Adj. | 0.000 | 0.000 | -0.036 | -0.018 | -1.844 | -0.007 | |
| AIC | 228835.1 | 228847.1 | 30.3 | 20.6 | 388055.8 | 229913.1 | 22889.4 |
| BIC | 228914.6 | 229035.9 | 38.2 | 38.5 | 388145.2 | 230111.7 | 22950.2 |
| Log.Lik. | -114409.545 | -114404.560 | -7.134 | 7.708 | |||
| F | 1.258 | 1.030 | 0.889 | ||||
| RMSE | 0.51 | 0.51 | 0.35 | 0.16 | 0.86 | 0.51 | 0.52 |
Respecto al modelo Tobit estaba buscando evaluar si por el hecho de que Nueva York es la capital del mundo se podia encontrar evidencia de discriminación según los datos de la muestra, no se pudo obtener evidencia suficiente para afirmar o recahzar esta hipótesis ya que el coeficiente de Black aunque es positivo no es estadisticamente significativo, por lo que no hay evidencia estadistica suficiente para sustentar la hipótesis inicial.
Para la comparación de los modelos me decantaria por la estimación ols2 de minimos cuadardos ordinarios, descarto completamente las estimaciones por iv puesto que los test demuestran que no se estan utilizados buenos instrumentos, en el modelo tobit tambien se descarta puesto que los coeficientes no son estadisticamente significativos y deberia estudiarse con mayor profundidad la muestra para recomendar una censura especifica, los modelos ols3 y ols4 son descartados inmediatamente por no tener significancia en ninguna de sus variables explicatoria además de recopilar unicamente 20 datos para el estudio lo que a mí consideración no representa una muestra significativa para todas los efectos que desea capturar de la variables explicatorias, en modelo 1 lo descarto ya que la unica variable explicatoria significativa es smsa y sufre de varios problemas como heterocedasticidad y una distribución de los residuos no normal, el modelo ols2 a pesar de que solo cuenta con la variable smsa significativa incluye tambien los efectos categoricos de la variable yob lo que permite estudiar de manera detallada la incidencia de cada región en el salario, destacando la región 3 con un coeficiente significativo al 5%, finalmente este modelo cumple con los supuestos de ols y presenta uno de los estadisticos AIC y BIC ,ás bajos.