library(normtest)
library(car)
## Loading required package: carData
library(tseries)
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(nortest)
library(MASS)
rm(list=ls(all=T))
Datpg= read.csv("G:/UAAAN/MATERIAS/2019/REGRESION APLICADA/LECCION 8/Pin_gre.csv")
attach(Datpg)
head(Datpg)
## DN HT Btot D2H
## 1 3 3.9 3.6 35.2
## 2 3 3.6 6.0 32.4
## 3 4 5.3 4.9 84.8
## 4 4 3.9 4.9 61.6
## 5 4 5.1 4.3 81.6
## 6 4 5.3 6.1 84.8
#*Haciendo la figura de dispersion entre "D2H" y "Btot"*
plot(D2H, Btot, xlab="D^2^H", ylab="Biomasa total (kg)", pch=1)
#COMPROBAR LA LINEALIDAD
abline(lm(Btot~D2H), col="red")
#COMPROBAR LA LINEALIDAD con un smooth
scatter.smooth(D2H, Btot, main="", col="green", ylab="Biomasa total (kg)")
En la figura anterior se observa que la tendencia entre la varIable dependiente (Btot) e independiente (D2H) es lineal, en base a esto, procedemos a hacer un modelo de regresión lineal.
fit1<-lm(Btot~D2H)
summary(fit1)
##
## Call:
## lm(formula = Btot ~ D2H)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.079 -5.757 -0.457 5.056 45.197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.065401 1.179620 2.599 0.00992 **
## D2H 0.037372 0.001178 31.736 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.28 on 248 degrees of freedom
## Multiple R-squared: 0.8024, Adjusted R-squared: 0.8016
## F-statistic: 1007 on 1 and 248 DF, p-value: < 2.2e-16
cov(D2H,Btot/var(D2H)) #Coefciente b
## [1] 0.03737189
mean(Btot)-cov(D2H,Btot)/var(D2H)*mean(D2H) #Coefciente a
## [1] 3.065401
cov(D2H,Btot)^2/(var(D2H)*var(Btot)) #R cuadrado
## [1] 0.8024172
#**Aqui comprobamos que los coeficientes de regresion calculados automaticamete mediante "lm" son igual que si los calculamos manualmente**
boxplot(fit1$residuals, col="green")
# Prueba de normalidad
#Opcion 1 (Muestras n>=50) Lilliefors - Kolmogorov
lillie.test(fit1$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: fit1$residuals
## D = 0.10355, p-value = 7.909e-07
#Opcion 2 (Muestras n<=50) Shapirol - Wilks
ST= shapiro.test(fit1$residuals)
print(ST)
##
## Shapiro-Wilk normality test
##
## data: fit1$residuals
## W = 0.94577, p-value = 5.231e-08
#Opcion 3 Anderson Darling
AD= ad.test(fit1$residuals)
AD
##
## Anderson-Darling normality test
##
## data: fit1$residuals
## A = 3.8171, p-value = 1.54e-09
#Grafico
qqPlot(fit1$residuals, col="green")
## [1] 236 221
Las diferentes pruebas de normalidad demuestran que los residuales no se distribuyen normal, por lo que no debe procederse con el modelo. Es recomendable hacer una trasnformacion
bm= boxcox(lm(Btot~D2H))
str(bm)
## List of 2
## $ x: num [1:100] -2 -1.96 -1.92 -1.88 -1.84 ...
## $ y: num [1:100] -1004 -990 -976 -962 -949 ...
valor_de_lambda<-bm$ lambdahat
valor_de_lambda
## NULL
lambda= bm$x
lik= bm$y
bcm=cbind(lambda,lik)
bcm[order(lik),]
## lambda lik
## [1,] -2.00000000 -1003.7012
## [2,] -1.95959596 -989.7467
## [3,] -1.91919192 -975.8998
## [4,] -1.87878788 -962.1626
## [5,] -1.83838384 -948.5375
## [6,] -1.79797980 -935.0266
## [7,] -1.75757576 -921.6323
## [8,] -1.71717172 -908.3569
## [9,] -1.67676768 -895.2026
## [10,] -1.63636364 -882.1718
## [11,] -1.59595960 -869.2670
## [12,] -1.55555556 -856.4905
## [13,] -1.51515152 -843.8447
## [14,] -1.47474747 -831.3320
## [15,] -1.43434343 -818.9549
## [16,] -1.39393939 -806.7158
## [17,] 2.00000000 -795.0011
## [18,] -1.35353535 -794.6173
## [19,] -1.31313131 -782.6616
## [20,] 1.95959596 -780.4703
## [21,] -1.27272727 -770.8514
## [22,] 1.91919192 -766.0520
## [23,] -1.23232323 -759.1890
## [24,] 1.87878788 -751.7548
## [25,] -1.19191919 -747.6770
## [26,] 1.83838384 -737.5872
## [27,] -1.15151515 -736.3178
## [28,] -1.11111111 -725.1139
## [29,] 1.79797980 -723.5573
## [30,] -1.07070707 -714.0678
## [31,] 1.75757576 -709.6739
## [32,] -1.03030303 -703.1820
## [33,] 1.71717172 -695.9477
## [34,] -0.98989899 -692.4589
## [35,] 1.67676768 -682.3902
## [36,] -0.94949495 -681.9012
## [37,] -0.90909091 -671.5112
## [38,] 1.63636364 -669.0143
## [39,] -0.86868687 -661.2916
## [40,] 1.59595960 -655.8341
## [41,] -0.82828283 -651.2450
## [42,] 1.55555556 -642.8645
## [43,] -0.78787879 -641.3741
## [44,] -0.74747475 -631.6815
## [45,] 1.51515152 -630.1227
## [46,] -0.70707071 -622.1702
## [47,] 1.47474747 -617.6268
## [48,] -0.66666667 -612.8430
## [49,] 1.43434343 -605.3966
## [50,] -0.62626263 -603.7031
## [51,] -0.58585859 -594.7536
## [52,] 1.39393939 -593.4536
## [53,] -0.54545455 -585.9981
## [54,] 1.35353535 -581.8202
## [55,] -0.50505051 -577.4404
## [56,] 1.31313131 -570.5211
## [57,] -0.46464646 -569.0843
## [58,] -0.42424242 -560.9344
## [59,] 1.27272727 -559.5817
## [60,] -0.38383838 -552.9953
## [61,] 1.23232323 -549.0285
## [62,] -0.34343434 -545.2724
## [63,] 1.19191919 -538.8893
## [64,] -0.30303030 -537.7716
## [65,] -0.26262626 -530.4990
## [66,] 1.15151515 -529.1918
## [67,] -0.22222222 -523.4622
## [68,] 1.11111111 -519.9644
## [69,] -0.18181818 -516.6689
## [70,] 1.07070707 -511.2349
## [71,] -0.14141414 -510.1279
## [72,] -0.10101010 -503.8494
## [73,] 1.03030303 -503.0301
## [74,] -0.06060606 -497.8439
## [75,] 0.98989899 -495.3756
## [76,] -0.02020202 -492.1238
## [77,] 0.94949495 -488.2952
## [78,] 0.02020202 -486.7023
## [79,] 0.90909091 -481.8090
## [80,] 0.06060606 -481.5941
## [81,] 0.10101010 -476.8154
## [82,] 0.86868687 -475.9358
## [83,] 0.14141414 -472.3832
## [84,] 0.82828283 -470.6887
## [85,] 0.18181818 -468.3169
## [86,] 0.78787879 -466.0781
## [87,] 0.22222222 -464.6362
## [88,] 0.74747475 -462.1103
## [89,] 0.26262626 -461.3626
## [90,] 0.70707071 -458.7855
## [91,] 0.30303030 -458.5189
## [92,] 0.34343434 -456.1280
## [93,] 0.66666667 -456.1021
## [94,] 0.38383838 -454.2140
## [95,] 0.62626263 -454.0519
## [96,] 0.42424242 -452.8012
## [97,] 0.58585859 -452.6237
## [98,] 0.46464646 -451.9133
## [99,] 0.54545455 -451.8037
## [100,] 0.50505051 -451.5734
lamvalue=bm$x[which(bm$y==max(bm$y))]
lamvalue
## [1] 0.5050505
fit2=lm(Btot^lamvalue~D2H)
summary(fit2)
##
## Call:
## lm(formula = Btot^lamvalue ~ D2H)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9780 -0.7411 -0.0158 0.6245 3.3393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9223003 0.1023138 28.56 <2e-16 ***
## D2H 0.0031520 0.0001021 30.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9781 on 248 degrees of freedom
## Multiple R-squared: 0.7934, Adjusted R-squared: 0.7926
## F-statistic: 952.3 on 1 and 248 DF, p-value: < 2.2e-16
#Opcion 1 (Muestras n>=50) Lilliefors - Kolmogorov
lillie.test(fit2$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: fit2$residuals
## D = 0.061916, p-value = 0.02141
#Opcion 2 Jarque - Bera
JB= jarque.bera.test(fit2$residuals)
JB
##
## Jarque Bera Test
##
## data: fit2$residuals
## X-squared = 3.8207, df = 2, p-value = 0.148
#Grafico de cuantiles normales
qqPlot(fit2$residuals, col.lines = "green")
## [1] 182 242
Las diferentes pruebas de normalidad demuestran que los residuales ya se distribuyen normal, por lo que ya es procedente continuar probando los demas supeustos de regresion. En este caso ya no se harán, ahora se explica el procedimiento ´para generar una serie de trasnformaciones, en las que ademas de la normalidad, se comprueba la homogeneidad de varianza
#Buscando mas transformaciones que cumplan con los supuetos de un modelo lineal
require(trafo)
## Loading required package: trafo
## Registered S3 method overwritten by 'pryr':
## method from
## print.bytes Rcpp
##
## Attaching package: 'trafo'
## The following object is masked from 'package:MASS':
##
## boxcox
linMod <- lm(Btot ~ D2H)
summary(linMod)
##
## Call:
## lm(formula = Btot ~ D2H)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.079 -5.757 -0.457 5.056 45.197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.065401 1.179620 2.599 0.00992 **
## D2H 0.037372 0.001178 31.736 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.28 on 248 degrees of freedom
## Multiple R-squared: 0.8024, Adjusted R-squared: 0.8016
## F-statistic: 1007 on 1 and 248 DF, p-value: < 2.2e-16
library(trafo)
linMod <- lm(Btot ~ D2H)
summary(linMod)
##
## Call:
## lm(formula = Btot ~ D2H)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.079 -5.757 -0.457 5.056 45.197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.065401 1.179620 2.599 0.00992 **
## D2H 0.037372 0.001178 31.736 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.28 on 248 degrees of freedom
## Multiple R-squared: 0.8024, Adjusted R-squared: 0.8016
## F-statistic: 1007 on 1 and 248 DF, p-value: < 2.2e-16
assumptions(linMod)
## The default lambdarange for the Log shift opt transformation is calculated dependent on the data range. The lower value is set to -2.1 and the upper value to 169.1
##
## The default lambdarange for the Square-root shift transformation is calculated dependent on the data range. The lower value is set to -2.1 and the upper value to 169.1
##
## Test normality assumption
## Skewness Kurtosis Shapiro_W Shapiro_p
## sqrtshift 0.1971 3.1389 0.9918 0.1809
## dual 0.2415 3.3619 0.9902 0.0892
## boxcox 0.2520 3.3581 0.9891 0.0576
## bickeldoksum 0.2520 3.3582 0.9891 0.0576
## gpower 0.2531 3.3572 0.9890 0.0547
## modulus 0.2582 3.3681 0.9886 0.0452
## yeojohnson 0.2582 3.3681 0.9886 0.0452
## neglog -0.2059 2.6220 0.9885 0.0431
## glog -0.2930 2.7289 0.9868 0.0210
## log -0.3049 2.7516 0.9866 0.0192
## logshiftopt 0.2944 3.3301 0.9854 0.0119
## manly 0.3348 3.7712 0.9843 0.0073
## untransformed 0.4305 5.8502 0.9458 0.0000
## reciprocal -2.0726 8.9934 0.8243 0.0000
##
## Test homoscedasticity assumption
## BreuschPagan_V BreuschPagan_p
## neglog 0.4983 0.4803
## glog 1.1782 0.2777
## log 1.2696 0.2598
## sqrtshift 3.8059 0.0511
## dual 6.1705 0.0130
## boxcox 6.3758 0.0116
## bickeldoksum 6.3765 0.0116
## gpower 6.3945 0.0114
## modulus 6.5904 0.0103
## yeojohnson 6.5904 0.0103
## logshiftopt 7.8838 0.0050
## reciprocal 9.7836 0.0018
## manly 12.9434 0.0003
## untransformed 43.6094 0.0000
##
## Test linearity assumption
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
Los resultados son ordenados por el valor más alto según la prueba de Shapiro-Wilk y Breusch-Pagan
linMod_trafo <- trafo_lm(linMod)
linMod_trafo
## Untransformed model
##
## Call:
## lm(formula = Btot ~ D2H)
##
## Coefficients:
## (Intercept) D2H
## 3.06540 0.03737
##
## Transformed model: boxcox transformation
##
## Call: lm(formula = formula, data = data)
## formula = Btott ~ D2H
##
## Coefficients:
## (Intercept) D2H
## 3.818146 0.006332
diagnostics(linMod_trafo)
## Diagnostics: Untransformed vs transformed model
##
## Transformation: boxcox
## Estimation method: ml
## Optimal Parameter: 0.5091693
##
## Residual diagnostics:
##
## Normality:
## Pearson residuals:
## Skewness Kurtosis Shapiro_W Shapiro_p
## Untransformed model 0.4304925 5.850186 0.9457719 5.231264e-08
## Transformed model 0.2519717 3.358104 0.9891488 5.760956e-02
##
## Heteroscedasticity:
## BreuschPagan_V BreuschPagan_p
## Untransformed model 43.609435 4.009015e-11
## Transformed model 6.375832 1.156848e-02
plot(linMod_trafo)
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
Solo deberá cambiar el metodo de trasnformacion en trafo = “”
linMod_trafo2 <- trafo_lm(object = linMod, trafo = "logshiftopt", method = "skew")
linMod_trafo2
## Untransformed model
##
## Call:
## lm(formula = Btot ~ D2H)
##
## Coefficients:
## (Intercept) D2H
## 3.06540 0.03737
##
## Transformed model: logshiftopt transformation
##
## Call: lm(formula = formula, data = data)
## formula = Btott ~ D2H
##
## Coefficients:
## (Intercept) D2H
## 5.0087518 0.0001965
diagnostics(linMod_trafo2)
## Diagnostics: Untransformed vs transformed model
##
## Transformation: logshiftopt
## Estimation method: skew
## Optimal Parameter: 143.8589
##
## Residual diagnostics:
##
## Normality:
## Pearson residuals:
## Skewness Kurtosis Shapiro_W Shapiro_p
## Untransformed model 0.4304925 5.850186 0.9457719 5.231264e-08
## Transformed model 0.3154972 4.705136 0.9736489 1.366299e-04
##
## Heteroscedasticity:
## BreuschPagan_V BreuschPagan_p
## Untransformed model 43.60944 4.009015e-11
## Transformed model 21.11415 4.327211e-06
plot(linMod_trafo2)
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
Como puede observar, se mejoran los supuestos del modelo transformado al NO transformado. Después de esto prosigue a probar la autocorrelacion