setwd("G:/UAAAN/MATERIAS/2019/REGRESION APLICADA/LECCION 9")
rm(list=ls(all=TRUE))
El test consiste en verificar que la varianza de ?? no es función monotona creciente o decreciente de las x, en cuyo caso no habría heteroscedasticidad. Ordenar las observaciones de la serie en orden creciente, tomando como referencia los datos de la variable exógena xi que supuestamente produce el problema de heteroscedasticidad. Para ello se elimina 1/5 parte de los datos centrales, A continuación, se toma dos grupos en los extremos estimándose para cada uno su respectiva varianza. A las observaciones restantes se les ajusta un modelo separadamete y se determina el valor de F = SCE mayor / SCE
dat=read.csv("Intercepcion.csv")
attach(dat);dat
## X Y est est2
## 1 1.0 0.1 0.21242 0.155056
## 2 3.0 0.2 0.63726 0.465168
## 3 3.0 0.5 0.63726 0.465168
## 4 5.0 0.5 1.06210 0.775280
## 5 5.5 2.8 1.16831 0.852808
## 6 11.0 3.3 2.33662 1.705616
## 7 13.0 1.5 2.76146 2.015728
## 8 14.0 1.1 2.97388 2.170784
## 9 18.0 1.5 3.82356 2.791008
## 10 19.0 2.1 4.03598 2.946064
## 11 19.0 4.1 4.03598 2.946064
## 12 20.0 1.9 4.24840 3.101120
## 13 23.0 8.4 4.88566 3.566288
## 14 26.0 6.2 5.52292 4.031456
## 15 28.0 7.6 5.94776 4.341568
## 16 32.0 5.1 6.79744 4.961792
## 17 34.0 3.3 7.22228 5.271904
## 18 40.0 5.5 8.49680 6.202240
## 19 40.0 13.3 8.49680 6.202240
## 20 40.0 9.9 8.49680 6.202240
## 21 50.0 5.7 10.62100 7.752800
## 22 50.0 10.2 10.62100 7.752800
## 23 51.0 15.7 10.83342 7.907856
## 24 61.0 10.7 12.95762 9.458416
## 25 65.0 18.7 13.80730 10.078640
## 26 68.0 10.1 14.44456 10.543808
## 27 70.0 13.5 14.86940 10.853920
## 28 75.0 24.6 15.93150 11.629200
## 29 77.0 20.4 16.35634 11.939312
## 30 80.0 11.2 16.99360 12.404480
# Grafica
plot(X, Y,ylab="Intercepcion de lluvia", xlab="Precipitacion total", main="Diagrama de dispersiæ¼ã¸³n" )
# Estimaciæ¼ã¸³n de paræ¼ã¸±metros y prueba de hipæ¼ã¸³tesis
fit<-lm(Y~X-1); summary(fit)
##
## Call:
## lm(formula = Y ~ X - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0921 -2.2948 -0.5146 1.4743 8.3887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X 0.21615 0.01416 15.27 2.13e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.29 on 29 degrees of freedom
## Multiple R-squared: 0.8894, Adjusted R-squared: 0.8855
## F-statistic: 233.1 on 1 and 29 DF, p-value: 2.128e-15
abline(fit,col="red")
# Graficas y estadæ¼ã¹¤sticos de diagnæ¼ã¸³stico
oldpar <- par(oma=c(0,0,3,0), mfrow=c(1,2))
plot(fit)
par(oldpar)
#Prueba de normalidad
shapiro.test(fit$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit$residuals
## W = 0.96872, p-value = 0.5048
#Prueba Breusch Pagan Test Homosedasticidad
#library(lmtest); bptest(fit)
# Estadæ¼ã¹¤sticos de diagnæ¼ã¸³stico
inflm.SR=influence.measures(fit)
summary(inflm.SR)
## Potentially influential observations of
## lm(formula = Y ~ X - 1) :
##
## dfb.X dffit cov.r cook.d hat
## 27 -0.16 -0.16 1.13_* 0.03 0.09
## 28 1.04_* 1.04_* 0.87_* 0.84_* 0.10_*
## 29 0.43 0.43 1.10_* 0.18 0.11_*
## 30 -0.76 -0.76_* 1.02 0.52_* 0.12_*
#Figura de residuales
plot(fit$fitted.values, fit$residuals, ylab="Residuales", xlab="Estimados", abline(h=0, col="red"))
# Grupo 1
Y1=Y[1:12]
X1=X[1:12]
fit1<-lm(Y1~X1-1); summary(fit1)
##
## Call:
## lm(formula = Y1 ~ X1 - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9592 -0.5751 -0.1965 0.4436 2.0486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X1 0.13662 0.02368 5.77 0.000125 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.061 on 11 degrees of freedom
## Multiple R-squared: 0.7517, Adjusted R-squared: 0.7291
## F-statistic: 33.3 on 1 and 11 DF, p-value: 0.0001246
anova(fit1)
## Analysis of Variance Table
##
## Response: Y1
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 37.448 37.448 33.297 0.0001246 ***
## Residuals 11 12.372 1.125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Grupo 2
Y2=Y[19:30]
X2=X[19:30]
fit2<-lm(Y2~X2-1); summary(fit2)
##
## Call:
## lm(formula = Y2 ~ X2 - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6873 -3.4803 -0.0116 4.1991 7.8307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X2 0.22359 0.02182 10.24 5.8e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.692 on 11 degrees of freedom
## Multiple R-squared: 0.9051, Adjusted R-squared: 0.8965
## F-statistic: 105 on 1 and 11 DF, p-value: 5.802e-07
anova(fit2)
## Analysis of Variance Table
##
## Response: Y2
## Df Sum Sq Mean Sq F value Pr(>F)
## X2 1 2310.93 2310.93 104.96 5.802e-07 ***
## Residuals 11 242.19 22.02
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#calculo de F (F calculada)
Fc=anova(fit2)[2,2]/anova(fit1)[2,2]; Fc #Si SCR del G2 es mayor que la SCE del G1
## [1] 19.57657
(length(X2)-2)
## [1] 10
#La F tabulada
#http://www.uaaan.mx/~jmelbos/tablas/distf.pdf
Ft=qf(1-0.05, (length(X2)-2),(length(X2)-2)); Ft
## [1] 2.978237
Ft=qf(1-0.05, 10,10); Ft
## [1] 2.978237
A continuación se realiza la hipótesis:
Ho: No Existe heteroscedasticidad
H1: Existe heteroscedasticidad
Si el Fc es mayor que el de tabla se rechaza la Ho de homoscedasticidad. El F(10,10)de tablas a un nivel del 5 % de significancia es 5.05 , como el F de la tabla es menor al F calculado, entonces se rechaza la Ho. Se concluye que el modelo presenta problemas de #Heterosedasticidad
La aceptacion o rechazo:
Si el Fc es mayor que el de tabla se rechaza la Ho de homoscedasticidad**
if(Fc > Ft) {
"Presencia de Heterosedasticidad"
} else {
"Presemcia de Homosedasticidad"
}
## [1] "Presencia de Heterosedasticidad"
fit.wls = lm(Y~X-1, weights=1/X); summary(fit.wls)
##
## Call:
## lm(formula = Y ~ X - 1, weights = 1/X)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.6855 -0.4578 -0.1311 0.3139 1.0137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X 0.21095 0.01549 13.62 3.94e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4999 on 29 degrees of freedom
## Multiple R-squared: 0.8648, Adjusted R-squared: 0.8601
## F-statistic: 185.5 on 1 and 29 DF, p-value: 3.944e-14
#prueba de normalidad despues de la ponderacion
shapiro.test(fit.wls$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit.wls$residuals
## W = 0.96033, p-value = 0.3158
#Figura de residuales
plot(fit.wls$fitted.values, fit.wls$residuals, ylab="Residuales", xlab="Estimados", abline(h=0, col="red"))
A continuacón se hace nuevamente la prueba de Goldfeld, es decir la regresión particionada para obtener la SCE de ambas regesiones y calcular la F, y compararla con F de tablas
# Prueba de Goldfeld - Quandt: Grupo 1
Y1=Y[1:12]
X1=X[1:12]
fitw1<-lm(Y1~X1-1, weights=1/X1); summary(fitw1)
##
## Call:
## lm(formula = Y1 ~ X1 - 1, weights = 1/X1)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.27881 -0.18637 -0.11553 0.09561 0.84437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X1 0.14905 0.03035 4.911 0.000464 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3481 on 11 degrees of freedom
## Multiple R-squared: 0.6867, Adjusted R-squared: 0.6583
## F-statistic: 24.11 on 1 and 11 DF, p-value: 0.0004637
anova(fitw1)
## Analysis of Variance Table
##
## Response: Y1
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 2.9214 2.92137 24.115 0.0004637 ***
## Residuals 11 1.3326 0.12115
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Prueba de Goldfeld - Quandt: Grupo 2
Y2=Y[19:30]
X2=X[19:30]
fitw2<-lm(Y2~X2-1,weights=1/X2); summary(fitw2)
##
## Call:
## lm(formula = Y2 ~ X2 - 1, weights = 1/X2)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.78902 -0.45276 -0.00701 0.52241 0.88694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X2 0.22558 0.02192 10.29 5.54e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.591 on 11 degrees of freedom
## Multiple R-squared: 0.9059, Adjusted R-squared: 0.8974
## F-statistic: 105.9 on 1 and 11 DF, p-value: 5.544e-07
anova(fitw2)
## Analysis of Variance Table
##
## Response: Y2
## Df Sum Sq Mean Sq F value Pr(>F)
## X2 1 36.996 36.996 105.91 5.544e-07 ***
## Residuals 11 3.842 0.349
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#la aceptacion o rechazo:
#Si el Fc es mayor que el de tabla se rechaza la Ho de homoscedasticidad
#calculo de F (F calculada)
Fc=anova(fitw2)[2,2]/anova(fitw1)[2,2]; Fc
## [1] 2.883303
#F tabulada
Ft=qf(1-0.05, 10,10); Ft
## [1] 2.978237
if(Fc > Ft) {
"Presencia de Heterosedasticidad"
} else {
"Presencia de Homosedasticidad"
}
## [1] "Presencia de Homosedasticidad"
#Box-Pierce and Ljung-Box Tests
#La prueba de Ljung-Box se puede definir de la siguiente manera:
#H0: Los errores se distribuyen de forma independiente
#H1: Los errores no se distribuyen de forma independiente.
e=fit.wls$residuals
Box.test (e, lag = 1, type = "Ljung")
##
## Box-Ljung test
##
## data: e
## X-squared = 0.28245, df = 1, p-value = 0.5951
# Prueba de normalidad
shapiro.test(e)
##
## Shapiro-Wilk normality test
##
## data: e
## W = 0.96033, p-value = 0.3158
qqnorm(fit.wls$residuals)
qqline(fit.wls$residuals)
# Estadæ¼ã¹¤sticos de diagnæ¼ã¸³stico
inflm.SR=influence.measures(fit.wls)
summary(inflm.SR)
## Potentially influential observations of
## lm(formula = Y ~ X - 1, weights = 1/X) :
##
## dfb.X dffit cov.r cook.d hat
## 27 -0.08 -0.08 1.11_* 0.01 0.07
## 28 0.63 0.63_* 0.95 0.34 0.07
Haciendo regresion con los residuales del modelo para utilizarlos como factor de ponderación
fitw3 <- lm(Y ~ X-1); summary(fitw3)
##
## Call:
## lm(formula = Y ~ X - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0921 -2.2948 -0.5146 1.4743 8.3887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X 0.21615 0.01416 15.27 2.13e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.29 on 29 degrees of freedom
## Multiple R-squared: 0.8894, Adjusted R-squared: 0.8855
## F-statistic: 233.1 on 1 and 29 DF, p-value: 2.128e-15
var.func <- lm(fitw3$residuals^2 ~ X); summary(var.func)
##
## Call:
## lm(formula = fitw3$residuals^2 ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.670 -5.143 0.489 3.411 44.077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.17961 3.65529 -0.870 0.392
## X 0.39297 0.08616 4.561 9.2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.51 on 28 degrees of freedom
## Multiple R-squared: 0.4263, Adjusted R-squared: 0.4058
## F-statistic: 20.8 on 1 and 28 DF, p-value: 9.197e-05
lmlog <- lm(log(fitw3$residuals^2) ~ log(X)); summary(lmlog)
##
## Call:
## lm(formula = log(fitw3$residuals^2) ~ log(X))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.4423 -0.2042 0.6505 1.2525 2.4144
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.1290 1.3117 -3.148 0.003885 **
## log(X) 1.5653 0.3945 3.968 0.000458 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.357 on 28 degrees of freedom
## Multiple R-squared: 0.3599, Adjusted R-squared: 0.337
## F-statistic: 15.74 on 1 and 28 DF, p-value: 0.0004582
explmlog <- exp(lmlog$fitted.values)
fitw4 <- lm(Y ~ X-1, weights = 1/sqrt(explmlog)); summary(fitw4)
##
## Call:
## lm(formula = Y ~ X - 1, weights = 1/sqrt(explmlog))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -2.9888 -1.8340 -0.5224 1.2088 4.4922
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X 0.21242 0.01506 14.11 1.62e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.074 on 29 degrees of freedom
## Multiple R-squared: 0.8728, Adjusted R-squared: 0.8684
## F-statistic: 199 on 1 and 29 DF, p-value: 1.623e-14
**** Regresion GLM con los residuales del modelo para utilizarlos como factor de ponderación**
# 3. GLS factible
u2=log(fitw3$residuals^2)
mg=glm(u2 ~ X); summary(mg)
##
## Call:
## glm(formula = u2 ~ X)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.7148 -0.6938 0.5570 1.5973 2.4483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.49860 0.77268 -1.939 0.06258 .
## X 0.06585 0.01821 3.616 0.00117 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 5.918195)
##
## Null deviance: 243.08 on 29 degrees of freedom
## Residual deviance: 165.71 on 28 degrees of freedom
## AIC: 142.41
##
## Number of Fisher Scoring iterations: 2
u2est=log(mg$residuals^2)
fitw4<- glm((Y) ~ X-1, weights=1/exp(u2est),family=gaussian); summary(fitw4)
##
## Call:
## glm(formula = (Y) ~ X - 1, family = gaussian, weights = 1/exp(u2est))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.7520 -0.6133 0.1774 2.4456 29.5047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X 0.155056 0.006367 24.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 50.48121)
##
## Null deviance: 31406 on 30 degrees of freedom
## Residual deviance: 1464 on 29 degrees of freedom
## AIC: 207.04
##
## Number of Fisher Scoring iterations: 2
Nota estos datos son sintéticos, solo para ejemplificar la Prueba de Goldfeld - Quandt