Muestra1 <- read.csv("Muestra1.csv")
attach(Muestra1[,2:5])
head(Muestra1[,2:5])
## D H V B
## 1 4.392676 19.5072 0.7050883 408.9512
## 2 3.342254 21.9456 0.4643955 269.3494
## 3 4.360845 21.6408 0.7277418 422.0902
## 4 2.641972 21.3360 0.2916630 169.1646
## 5 5.570423 24.9936 1.5772458 914.8025
## 6 3.533240 24.3840 0.6399597 371.1766
### variables
D2<-D*D
H2<-H*H
DH2<-D*H2
D2H<-D2*H
D3<-D*D*D
## GENERAR VARIABLES Y CORRELACION
Set1<-data.frame(B, D, H, D2, H2, DH2, D2H,D3 ); Set1
## B D H D2 H2 DH2 D2H D3
## 1 408.9512 4.392676 19.5072 19.295606 380.5309 1671.549 376.4032 84.75935
## 2 269.3494 3.342254 21.9456 11.170660 481.6094 1609.661 245.1468 37.33518
## 3 422.0902 4.360845 21.6408 19.016973 468.3242 2042.290 411.5425 82.93008
## 4 169.1646 2.641972 21.3360 6.980016 455.2249 1202.691 148.9256 18.44101
## 5 914.8025 5.570423 24.9936 31.029612 624.6800 3479.732 775.5417 172.84807
## 6 371.1766 3.533240 24.3840 12.483783 594.5795 2100.792 304.4046 44.10820
## 7 909.8754 5.506761 24.6888 30.324417 609.5368 3356.574 748.6735 166.98932
## 8 837.6109 5.729578 24.3840 32.828063 594.5795 3406.689 800.4795 188.09095
## 9 298.9121 3.501409 22.8600 12.259863 522.5796 1829.765 280.2605 42.92679
## 10 169.1646 2.737465 19.8120 7.493715 392.5153 1074.497 148.4655 20.51378
## 11 566.6192 4.456338 23.7744 19.858952 565.2221 2518.821 472.1347 88.49821
## 12 520.6327 4.520000 24.3840 20.430403 594.5795 2687.499 498.1750 92.34543
## 13 629.0294 5.092958 21.9456 25.938223 481.6094 2452.816 569.2299 132.10229
## 14 397.4546 3.596902 24.0792 12.937702 579.8079 2085.512 311.5295 46.53564
## 15 555.1225 4.106198 25.9080 16.860858 671.2245 2756.180 436.8311 69.23401
## 16 957.5043 5.697747 24.3840 32.464320 594.5795 3387.763 791.6100 184.97348
## 17 845.8228 5.729578 24.3840 32.828063 594.5795 3406.689 800.4795 188.09095
## 18 308.7664 3.405916 24.6888 11.600262 609.5368 2076.031 286.3966 39.50952
## 19 450.0106 4.233521 26.2128 17.922704 687.1109 2908.899 469.8043 75.87615
## 20 344.8986 3.628733 23.1648 13.167701 536.6080 1947.207 305.0272 47.78207
## 21 256.2104 3.501409 20.1168 12.259863 404.6856 1416.970 246.6292 42.92679
## 22 167.5222 2.801127 19.2024 7.846312 368.7322 1032.866 150.6680 21.97852
## 23 323.5478 3.437747 25.2984 11.818103 640.0090 2200.189 298.9791 40.62764
## 24 699.6515 5.188451 23.4696 26.920025 550.8221 2857.914 631.8022 139.67324
## 25 596.1819 4.615493 22.5552 21.302779 508.7370 2348.072 480.4884 98.32283
Corr<-cor(Set1, method = "pearson") # Hacer la correlacion
round(Corr, digits = 3)
## B D H D2 H2 DH2 D2H D3
## B 1.000 0.968 0.530 0.975 0.523 0.939 0.986 0.972
## D 0.968 1.000 0.441 0.995 0.433 0.907 0.985 0.981
## H 0.530 0.441 1.000 0.417 0.999 0.766 0.524 0.396
## D2 0.975 0.995 0.417 1.000 0.409 0.900 0.991 0.996
## H2 0.523 0.433 0.999 0.409 1.000 0.762 0.518 0.389
## DH2 0.939 0.907 0.766 0.900 0.762 1.000 0.947 0.888
## D2H 0.986 0.985 0.524 0.991 0.518 0.947 1.000 0.988
## D3 0.972 0.981 0.396 0.996 0.389 0.888 0.988 1.000
Rplot1.jpeg
Biomasa esta altamente correlacionada con D2H con el 0.99 ,y una baja correlacion con H2(0.52), en cuanto a las demas varibles se tiene una adecuada correlacion , pareciera que biomasa cumplira el supuesto de normalidad.
#correlacion set1
la figura lo confirma , la mayoria de las variables estan aparentemente correlacionadas excepto H2 que es la que esta mas lejana a biomasa y su linea no es tan verde .
regvacia1<-lm(formula = B~1, Set1); summary(regvacia1) # B, es la variable dependiente (Biomasa)
##
## Call:
## lm(formula = B ~ 1, data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -328.08 -186.84 -73.51 133.43 461.90
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 495.60 49.57 9.998 4.94e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 247.9 on 24 degrees of freedom
la intercepcion tiene un valor de 495.60 y un Pr(>|t|) significativo.
regcompleta1<-lm(formula = B~.,Set1);
summary(regcompleta1)
##
## Call:
## lm(formula = B ~ ., data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -69.449 -18.025 3.301 18.364 72.382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1212.107 2511.006 0.483 0.635
## D 205.025 1014.919 0.202 0.842
## H -213.533 368.976 -0.579 0.570
## D2 -61.605 248.735 -0.248 0.807
## H2 10.354 14.795 0.700 0.493
## DH2 -2.941 3.437 -0.856 0.404
## D2H 18.439 19.341 0.953 0.354
## D3 -27.438 27.724 -0.990 0.336
##
## Residual standard error: 46.11 on 17 degrees of freedom
## Multiple R-squared: 0.9755, Adjusted R-squared: 0.9654
## F-statistic: 96.63 on 7 and 17 DF, p-value: 1.946e-12
el modelo no tiene variables ni intercepcion con un Pr(>|t|) signicativo ,por ahora. aunque si tiene una Adjusted R-squared: 0.9654 aceptables , no quiere decir que sea estadisticamente bueno .
regforw1<-step(regvacia1, scope = list(lower=regvacia1, upper=regcompleta1),direction = "forward"); summary(regforw1)
## Start: AIC=276.62
## B ~ 1
##
## Df Sum of Sq RSS AIC
## + D2H 1 1434340 40030 188.46
## + D2 1 1400143 74227 203.90
## + D3 1 1393131 81239 206.16
## + D 1 1381912 92458 209.39
## + DH2 1 1299357 175013 225.34
## + H 1 414442 1059928 270.37
## + H2 1 402873 1071497 270.64
## <none> 1474370 276.62
##
## Step: AIC=188.46
## B ~ D2H
##
## Df Sum of Sq RSS AIC
## <none> 40030 188.46
## + D2 1 585.24 39445 190.09
## + D 1 425.25 39605 190.20
## + D3 1 409.01 39621 190.21
## + H 1 337.67 39692 190.25
## + H2 1 297.45 39732 190.28
## + DH2 1 293.79 39736 190.28
##
## Call:
## lm(formula = B ~ D2H, data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.981 -15.381 -4.984 33.975 62.654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.64135 19.32001 -0.24 0.812
## D2H 1.13799 0.03964 28.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 41.72 on 23 degrees of freedom
## Multiple R-squared: 0.9728, Adjusted R-squared: 0.9717
## F-statistic: 824.1 on 1 and 23 DF, p-value: < 2.2e-16
regstep1<-step(regvacia1,scope =list(lower=regvacia1, upper=regcompleta1),direction = "both"); summary(regstep1)
## Start: AIC=276.62
## B ~ 1
##
## Df Sum of Sq RSS AIC
## + D2H 1 1434340 40030 188.46
## + D2 1 1400143 74227 203.90
## + D3 1 1393131 81239 206.16
## + D 1 1381912 92458 209.39
## + DH2 1 1299357 175013 225.34
## + H 1 414442 1059928 270.37
## + H2 1 402873 1071497 270.64
## <none> 1474370 276.62
##
## Step: AIC=188.46
## B ~ D2H
##
## Df Sum of Sq RSS AIC
## <none> 40030 188.46
## + D2 1 585 39445 190.09
## + D 1 425 39605 190.20
## + D3 1 409 39621 190.21
## + H 1 338 39692 190.25
## + H2 1 297 39732 190.28
## + DH2 1 294 39736 190.28
## - D2H 1 1434340 1474370 276.62
##
## Call:
## lm(formula = B ~ D2H, data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.981 -15.381 -4.984 33.975 62.654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.64135 19.32001 -0.24 0.812
## D2H 1.13799 0.03964 28.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 41.72 on 23 degrees of freedom
## Multiple R-squared: 0.9728, Adjusted R-squared: 0.9717
## F-statistic: 824.1 on 1 and 23 DF, p-value: < 2.2e-16
library(pacman)
p_load(bootStepAIC,MASS )
lmFit <-lm(formula = B~.,Set1)
boot.stepAIC(regstep1, Set1)
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = B ~ D2H, data = Set1)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## D2H 100
##
## Coefficients Sign
## + (%) - (%)
## D2H 100 0
##
## Stat Significance
## (%)
## D2H 100
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = B ~ D2H, data = Set1)
##
## Coefficients:
## (Intercept) D2H
## -4.641 1.138
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## B ~ D2H
##
## Final Model:
## B ~ D2H
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 23 40029.75 188.4626
boot.stepAIC(regstep1, Set1, B = 100, alpha = 0.05, direction = "forward", # Hacer 100 remuestreos
k = 2, verbose = F) # Usar el procedimiento "forward"
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = B ~ D2H, data = Set1)
##
## Bootstrap samples: 100
## Direction: forward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## D2H 100
##
## Coefficients Sign
## + (%) - (%)
## D2H 100 0
##
## Stat Significance
## (%)
## D2H 100
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = B ~ D2H, data = Set1)
##
## Coefficients:
## (Intercept) D2H
## -4.641 1.138
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## B ~ D2H
##
## Final Model:
## B ~ D2H
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 23 40029.75 188.4626
Ambos modelos (Forward Y Stepwis ) nos arroja una formula lm(formula = B ~ D2H, data = Set1), o B=B0 +B1D2H. SI remplazamos los valores tendremos que es B= -4.64135 +1.13799D2H ,tiene un error de 41.72 y una Adjusted R-squared: 0.9717 osea D2H explica el 97.17% de la biomasa .
si queremos mas exactitud o mas valides se aplica el modelo de con Boostrap forward notamos que efectivamente el modelo es el adecuado nos da unos valos iguales a los obtenidos en los anteriores modelos .
De primera veremos si el modelo que se obtuvo , es adecuado o si cumple las condiciones de regresion .en caso de que no lo haga se ajusta el modelo .
MODELO1.1<-lm(B ~ D2H, data = Muestra1) # Ajustar el moddelo
summary(MODELO1.1)
##
## Call:
## lm(formula = B ~ D2H, data = Muestra1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.981 -15.381 -4.984 33.975 62.654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.64135 19.32001 -0.24 0.812
## D2H 1.13799 0.03964 28.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 41.72 on 23 degrees of freedom
## Multiple R-squared: 0.9728, Adjusted R-squared: 0.9717
## F-statistic: 824.1 on 1 and 23 DF, p-value: < 2.2e-16
observamos que la intercepcion no es significativa , lo tenemos que corregir.A continuacion se muestra la correccion .
MODELO1<-lm(B ~ D2H-1, data = Muestra1)
summary(MODELO1)
##
## Call:
## lm(formula = B ~ D2H - 1, data = Muestra1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80.59 -17.61 -7.52 33.39 64.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## D2H 1.12940 0.01678 67.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40.89 on 24 degrees of freedom
## Multiple R-squared: 0.9947, Adjusted R-squared: 0.9945
## F-statistic: 4530 on 1 and 24 DF, p-value: < 2.2e-16
MODELO1$ residuals
## 1 2 3 4 5 6
## -16.1595618 -7.5200710 -42.7069351 0.9675794 38.9037465 27.3813237
## 7 8 9 10 11 12
## 64.3216988 -66.4526398 -17.6147505 1.4872764 33.3890726 -42.0073802
## 13 14 15 16 17 18
## -13.8602645 45.6123809 61.7643764 63.4579389 -58.2407678 -14.6906119
## 19 20 21 22 23 24
## -80.5875406 0.4001729 -22.3332580 -2.6426679 -14.1199918 -13.9075461
## 25
## 53.5170433
ahora el modelo es significativo y con una Adjusted R-squared: 0.9945 , es decir la variable explica el 99.45% de la biomasa. Acontinuacion se hace el analisis de observaciones atipicas , pues , si ocurre que hay un dato mayor de 3 , este tiene que ser eliminado y volver a ajustar el modelo .
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
summary(influence.measures(MODELO1))
## Potentially influential observations of
## lm(formula = B ~ D2H - 1, data = Muestra1) :
##
## dfb.D2H dffit cov.r cook.d hat
## 8 -0.63 -0.63_* 1.03 0.36 0.11
influencePlot(MODELO1)
## StudRes Hat CookD
## 8 -1.798958 0.10790139 0.3580682
## 16 1.704780 0.10552349 0.3176312
## 17 -1.551544 0.10790139 0.2750397
## 19 -2.155657 0.03716726 0.1557165
la observacion 8 influye en el ajuste del modelo , pero no se necesita eliminar , asi tambien se tienen las observaciones 16,17,19.
### variables
lnD2<-log(D)
lnH2<-log(H)
lnB<-log(B)
lnD3<-log(D)
lnDH2<-log(H)
H3<-H*H*H
lnH<-log(H)
lnD<-log(D)
lnH3<-log(H)
DH<-D*H
lnDH<-log(DH)
## GENERAR VARIABLES Y CORRELACION
Set2<-data.frame(lnB, lnD2, lnH2, lnD3, lnDH2, lnH, lnD,lnH3 ,lnDH ); Set2
## lnB lnD2 lnH2 lnD3 lnDH2 lnH lnD lnH3
## 1 6.013596 1.4799387 2.970784 1.4799387 2.970784 2.970784 1.4799387 2.970784
## 2 5.596009 1.2066454 3.088567 1.2066454 3.088567 3.088567 1.2066454 3.088567
## 3 6.045219 1.4726659 3.074580 1.4726659 3.074580 3.074580 1.4726659 3.074580
## 4 5.130872 0.9715256 3.060396 0.9715256 3.060396 3.060396 0.9715256 3.060396
## 5 6.818708 1.7174710 3.218620 1.7174710 3.218620 3.218620 1.7174710 3.218620
## 6 5.916678 1.2622152 3.193927 1.2622152 3.193927 3.193927 1.2622152 3.193927
## 7 6.813308 1.7059766 3.206350 1.7059766 3.206350 3.206350 1.7059766 3.206350
## 8 6.730554 1.7456419 3.193927 1.7456419 3.193927 3.193927 1.7456419 3.193927
## 9 5.700150 1.2531654 3.129389 1.2531654 3.129389 3.129389 1.2531654 3.129389
## 10 5.130872 1.0070323 2.986288 1.0070323 2.986288 2.986288 1.0070323 2.986288
## 11 6.339687 1.4943274 3.168609 1.4943274 3.168609 3.168609 1.4943274 3.168609
## 12 6.255045 1.5085121 3.193927 1.5085121 3.193927 3.193927 1.5085121 3.193927
## 13 6.444178 1.6278588 3.088567 1.6278588 3.088567 3.088567 1.6278588 3.088567
## 14 5.985081 1.2800728 3.181348 1.2800728 3.181348 3.181348 1.2800728 3.181348
## 15 6.319189 1.4124974 3.254552 1.4124974 3.254552 3.254552 1.4124974 3.254552
## 16 6.864330 1.7400708 3.193927 1.7400708 3.193927 3.193927 1.7400708 3.193927
## 17 6.740310 1.7456419 3.193927 1.7456419 3.193927 3.193927 1.7456419 3.193927
## 18 5.732585 1.2255139 3.206350 1.2255139 3.206350 3.206350 1.2255139 3.206350
## 19 6.109271 1.4430341 3.266248 1.4430341 3.266248 3.266248 1.4430341 3.266248
## 20 5.843251 1.2888835 3.142634 1.2888835 3.142634 3.142634 1.2888835 3.142634
## 21 5.545999 1.2531654 3.001555 1.2531654 3.001555 3.001555 1.2531654 3.001555
## 22 5.121116 1.0300218 2.955035 1.0300218 2.955035 2.955035 1.0300218 2.955035
## 23 5.779347 1.2348162 3.230741 1.2348162 3.230741 3.230741 1.2348162 3.230741
## 24 6.550582 1.6464352 3.155706 1.6464352 3.155706 3.155706 1.6464352 3.155706
## 25 6.390546 1.5294188 3.115966 1.5294188 3.115966 3.115966 1.5294188 3.115966
## lnDH
## 1 4.450722
## 2 4.295212
## 3 4.547246
## 4 4.031921
## 5 4.936091
## 6 4.456142
## 7 4.912326
## 8 4.939569
## 9 4.382554
## 10 3.993320
## 11 4.662937
## 12 4.702439
## 13 4.716426
## 14 4.461421
## 15 4.667049
## 16 4.933998
## 17 4.939569
## 18 4.431864
## 19 4.709282
## 20 4.431517
## 21 4.254721
## 22 3.985057
## 23 4.465557
## 24 4.802141
## 25 4.645384
Corr<-cor(Set2, method = "pearson") # Hacer la correlacion
round(Corr, digits = 3)
## lnB lnD2 lnH2 lnD3 lnDH2 lnH lnD lnH3 lnDH
## lnB 1.000 0.977 0.611 0.977 0.611 0.611 0.977 0.611 0.986
## lnD2 0.977 1.000 0.474 1.000 0.474 0.474 1.000 0.474 0.963
## lnH2 0.611 0.474 1.000 0.474 1.000 1.000 0.474 1.000 0.694
## lnD3 0.977 1.000 0.474 1.000 0.474 0.474 1.000 0.474 0.963
## lnDH2 0.611 0.474 1.000 0.474 1.000 1.000 0.474 1.000 0.694
## lnH 0.611 0.474 1.000 0.474 1.000 1.000 0.474 1.000 0.694
## lnD 0.977 1.000 0.474 1.000 0.474 0.474 1.000 0.474 0.963
## lnH3 0.611 0.474 1.000 0.474 1.000 1.000 0.474 1.000 0.694
## lnDH 0.986 0.963 0.694 0.963 0.694 0.694 0.963 0.694 1.000
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
pairs.panels(x = Set2, ellipses = FALSE, lm = TRUE, method = "pearson", hist.col = "#CD1076")
library(qgraph)
## Registered S3 methods overwritten by 'huge':
## method from
## plot.sim BDgraph
## print.sim BDgraph
Otra_Corr=cor(Set2)
qgraph(Otra_Corr, shape="circle", posCol="darkgreen", negCol="darkred", layout="spring", vsize=10)
lnBiomasa , Tiene alta correlacion con con la mayoria de las variables , la valor de correlacion mas baja es de 0.61 .se puede comprobar con la figura verde , pues las variables lnDH2 , LnH3, lnH2 , son las que se encuantran mas distantes de la variable lnB
regvacia2<-lm(formula = lnB~1, Set2); summary(regvacia2)
##
## Call:
## lm(formula = lnB ~ 1, data = Set2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.95554 -0.34407 -0.03144 0.36752 0.78767
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.0767 0.1069 56.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5345 on 24 degrees of freedom
regcompleta2<-lm(formula = lnB~.,Set2); # B, es la variable dependiente (Biomasa)
summary(regcompleta2)
##
## Call:
## lm(formula = lnB ~ ., data = Set2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.175353 -0.052300 -0.004839 0.064461 0.120418
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.29685 0.57340 -0.518 0.61
## lnD2 1.98394 0.07335 27.046 < 2e-16 ***
## lnH2 1.13849 0.19593 5.811 7.59e-06 ***
## lnD3 NA NA NA NA
## lnDH2 NA NA NA NA
## lnH NA NA NA NA
## lnD NA NA NA NA
## lnH3 NA NA NA NA
## lnDH NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07555 on 22 degrees of freedom
## Multiple R-squared: 0.9817, Adjusted R-squared: 0.98
## F-statistic: 589.7 on 2 and 22 DF, p-value: < 2.2e-16
regforw2<-step(regvacia2, scope = list(lower=regvacia2, upper=regcompleta2),direction = "forward"); summary(regforw2)
## Start: AIC=-30.34
## lnB ~ 1
##
## Df Sum of Sq RSS AIC
## + lnDH 1 6.6608 0.1967 -117.129
## + lnD2 1 6.5392 0.3183 -105.091
## + lnD3 1 6.5392 0.3183 -105.091
## + lnD 1 6.5392 0.3183 -105.091
## + lnH2 1 2.5566 4.3009 -40.001
## + lnDH2 1 2.5566 4.3009 -40.001
## + lnH 1 2.5566 4.3009 -40.001
## + lnH3 1 2.5566 4.3009 -40.001
## <none> 6.8575 -30.338
##
## Step: AIC=-117.13
## lnB ~ lnDH
##
## Df Sum of Sq RSS AIC
## + lnH2 1 0.071084 0.12557 -126.34
## + lnDH2 1 0.071084 0.12557 -126.34
## + lnH 1 0.071084 0.12557 -126.34
## + lnH3 1 0.071084 0.12557 -126.34
## + lnD2 1 0.071084 0.12557 -126.34
## + lnD3 1 0.071084 0.12557 -126.34
## + lnD 1 0.071084 0.12557 -126.34
## <none> 0.19666 -117.13
##
## Step: AIC=-126.34
## lnB ~ lnDH + lnH2
##
## Df Sum of Sq RSS AIC
## <none> 0.12557 -126.34
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2, data = Set2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.175353 -0.052300 -0.004839 0.064461 0.120418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.29685 0.57340 -0.518 0.60984
## lnDH 1.98394 0.07335 27.046 < 2e-16 ***
## lnH2 -0.84545 0.23957 -3.529 0.00189 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07555 on 22 degrees of freedom
## Multiple R-squared: 0.9817, Adjusted R-squared: 0.98
## F-statistic: 589.7 on 2 and 22 DF, p-value: < 2.2e-16
este modelo muestra que la intercept no es significativa , aunque las variables si lo son y se tiene una Adjusted R-squared: 0.98 ## Hacer un modelo con el procedimiento Stepwise
regstep2<-step(regvacia2, scope = list(lower=regvacia2, upper=regcompleta2),direction = "both"); summary(regstep2)
## Start: AIC=-30.34
## lnB ~ 1
##
## Df Sum of Sq RSS AIC
## + lnDH 1 6.6608 0.1967 -117.129
## + lnD2 1 6.5392 0.3183 -105.091
## + lnD3 1 6.5392 0.3183 -105.091
## + lnD 1 6.5392 0.3183 -105.091
## + lnH2 1 2.5566 4.3009 -40.001
## + lnDH2 1 2.5566 4.3009 -40.001
## + lnH 1 2.5566 4.3009 -40.001
## + lnH3 1 2.5566 4.3009 -40.001
## <none> 6.8575 -30.338
##
## Step: AIC=-117.13
## lnB ~ lnDH
##
## Df Sum of Sq RSS AIC
## + lnH2 1 0.0711 0.1256 -126.343
## + lnDH2 1 0.0711 0.1256 -126.343
## + lnH 1 0.0711 0.1256 -126.343
## + lnH3 1 0.0711 0.1256 -126.343
## + lnD2 1 0.0711 0.1256 -126.343
## + lnD3 1 0.0711 0.1256 -126.343
## + lnD 1 0.0711 0.1256 -126.343
## <none> 0.1967 -117.129
## - lnDH 1 6.6608 6.8575 -30.338
##
## Step: AIC=-126.34
## lnB ~ lnDH + lnH2
##
## Df Sum of Sq RSS AIC
## <none> 0.1256 -126.343
## - lnH2 1 0.0711 0.1967 -117.129
## - lnDH 1 4.1753 4.3009 -40.001
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2, data = Set2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.175353 -0.052300 -0.004839 0.064461 0.120418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.29685 0.57340 -0.518 0.60984
## lnDH 1.98394 0.07335 27.046 < 2e-16 ***
## lnH2 -0.84545 0.23957 -3.529 0.00189 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07555 on 22 degrees of freedom
## Multiple R-squared: 0.9817, Adjusted R-squared: 0.98
## F-statistic: 589.7 on 2 and 22 DF, p-value: < 2.2e-16
library(pacman)
p_load(bootStepAIC,MASS )
lmFit <-lm(formula = lnB~.,Set2) # El modelo, B = variable dependiente.
boot.stepAIC(regstep2, Set2)
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2, data = Set2)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## lnDH 100
## lnH2 98
##
## Coefficients Sign
## + (%) - (%)
## lnDH 100 0
## lnH2 0 100
##
## Stat Significance
## (%)
## lnDH 100.00
## lnH2 93.88
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2, data = Set2)
##
## Coefficients:
## (Intercept) lnDH lnH2
## -0.2968 1.9839 -0.8454
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## lnB ~ lnDH + lnH2
##
## Final Model:
## lnB ~ lnDH + lnH2
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 22 0.1255738 -126.3434
boot.stepAIC(regstep2, Set2, lnB = 100, alpha = 0.05, direction = "forward", # Hacer 100 remuestreos
k = 2, verbose = F)
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2, data = Set2)
##
## Bootstrap samples: 100
## Direction: forward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## lnDH 100
## lnH2 100
##
## Coefficients Sign
## + (%) - (%)
## lnDH 100 0
## lnH2 0 100
##
## Stat Significance
## (%)
## lnDH 100
## lnH2 88
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2, data = Set2)
##
## Coefficients:
## (Intercept) lnDH lnH2
## -0.2968 1.9839 -0.8454
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## lnB ~ lnDH + lnH2
##
## Final Model:
## lnB ~ lnDH + lnH2
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 22 0.1255738 -126.3434
finalmente la formula que se obtuvo fue de Final Model: lnB ~ lnDH + lnH2 ## comprobar los supuestos en regresion multiple
MODELO2.1<-lm(lnB ~ lnDH+lnH2, data = Muestra1) # SE TIENE QUE AJUSTAR
summary(MODELO2.1)
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2, data = Muestra1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.175353 -0.052300 -0.004839 0.064461 0.120418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.29685 0.57340 -0.518 0.60984
## lnDH 1.98394 0.07335 27.046 < 2e-16 ***
## lnH2 -0.84545 0.23957 -3.529 0.00189 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07555 on 22 degrees of freedom
## Multiple R-squared: 0.9817, Adjusted R-squared: 0.98
## F-statistic: 589.7 on 2 and 22 DF, p-value: < 2.2e-16
EL MODELO NO TIENE UNA INTERCEPCION SIGNIFICATIVA POR LO QUE SE TIENE QUE AJUSTAR Acontinuacion se presenta el modelo ajustado ,las variables son significativas y Adjusted R-squared: 0.9999 muy alto .
## MODELOS AJUSTADOS
MODELO2<-lm(lnB ~ lnDH+lnH2-1, data = Muestra1)
summary(MODELO2)
##
## Call:
## lm(formula = lnB ~ lnDH + lnH2 - 1, data = Muestra1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16320 -0.05116 -0.01278 0.06620 0.12610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lnDH 1.99639 0.06818 29.279 < 2e-16 ***
## lnH2 -0.95801 0.09900 -9.677 1.42e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07434 on 23 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 8.413e+04 on 2 and 23 DF, p-value: < 2.2e-16
MODELO2$ residuals
## 1 2 3 4 5 6
## -0.025750411 -0.020040265 -0.087388715 0.013465457 0.047805812 0.080284630
## 7 8 9 10 11 12
## 0.078093587 -0.070948422 -0.051161003 0.019532627 0.066196785 -0.073053507
## 13 14 15 16 17 18
## -0.012778619 0.126098150 0.119821895 0.073950025 -0.061192247 -0.043437442
## 19 20 21 22 23 24
## -0.163204107 0.006878959 -0.072571758 -0.003667531 -0.040574548 -0.013176269
## 25
## 0.101663522
ahora que el modelo ha sido corregido ,es significativo , y tiene como Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999 ,que son valores muy altos .
library(lmtest)
library(car)
summary(influence.measures(MODELO2))
## Potentially influential observations of
## lm(formula = lnB ~ lnDH + lnH2 - 1, data = Muestra1) :
##
## dfb.lnDH dfb.lnH2 dffit cov.r cook.d hat
## 4 -0.08 0.08 0.09 1.32_* 0.00 0.18
## 19 0.06 -0.09 -0.53 0.70_* 0.12 0.04
influencePlot(MODELO2)
## StudRes Hat CookD
## 4 0.1954437 0.17696698 0.004285882
## 10 0.2763093 0.13203974 0.006050155
## 14 1.8384207 0.06058381 0.098763877
## 19 -2.4850911 0.04391987 0.115791015
se planteo tranajar de esta forma , primero observe que mi intercept sea significativa , si no lo era se ajustaba , y tambien se calculo los influyentes , son importantes pues influyen a que se vuelva a corregir un modelo .asi que todos los modelos tendran estas dos consideraciones para ajustarse . en este set 2 , no hay necesidad de volverlo ajustar otra vez pues no hay ningun valor mayor de 3 , se encuentra que el valor 4 influye en la coovarianza , pero no sobrepasa mas de 3 , asi que no es necesario eliminarlo e igualmente los datos 8,10,14 .
B
## [1] 408.9512 269.3494 422.0902 169.1646 914.8025 371.1766 909.8754 837.6109
## [9] 298.9121 169.1646 566.6192 520.6327 629.0294 397.4546 555.1225 957.5043
## [17] 845.8228 308.7664 450.0106 344.8986 256.2104 167.5222 323.5478 699.6515
## [25] 596.1819
lnD2H<-log(D)
lnD<-log(D)
lnH<-log(H)
lnDH<-log(DH)
lnDH3<-log(D)
lnD3<-log(D)
DH3<-D*H3
Set3<-data.frame(B, lnD2H, lnD, lnH, lnDH, lnDH3, lnD3 ); Set3
## B lnD2H lnD lnH lnDH lnDH3 lnD3
## 1 408.9512 1.4799387 1.4799387 2.970784 4.450722 1.4799387 1.4799387
## 2 269.3494 1.2066454 1.2066454 3.088567 4.295212 1.2066454 1.2066454
## 3 422.0902 1.4726659 1.4726659 3.074580 4.547246 1.4726659 1.4726659
## 4 169.1646 0.9715256 0.9715256 3.060396 4.031921 0.9715256 0.9715256
## 5 914.8025 1.7174710 1.7174710 3.218620 4.936091 1.7174710 1.7174710
## 6 371.1766 1.2622152 1.2622152 3.193927 4.456142 1.2622152 1.2622152
## 7 909.8754 1.7059766 1.7059766 3.206350 4.912326 1.7059766 1.7059766
## 8 837.6109 1.7456419 1.7456419 3.193927 4.939569 1.7456419 1.7456419
## 9 298.9121 1.2531654 1.2531654 3.129389 4.382554 1.2531654 1.2531654
## 10 169.1646 1.0070323 1.0070323 2.986288 3.993320 1.0070323 1.0070323
## 11 566.6192 1.4943274 1.4943274 3.168609 4.662937 1.4943274 1.4943274
## 12 520.6327 1.5085121 1.5085121 3.193927 4.702439 1.5085121 1.5085121
## 13 629.0294 1.6278588 1.6278588 3.088567 4.716426 1.6278588 1.6278588
## 14 397.4546 1.2800728 1.2800728 3.181348 4.461421 1.2800728 1.2800728
## 15 555.1225 1.4124974 1.4124974 3.254552 4.667049 1.4124974 1.4124974
## 16 957.5043 1.7400708 1.7400708 3.193927 4.933998 1.7400708 1.7400708
## 17 845.8228 1.7456419 1.7456419 3.193927 4.939569 1.7456419 1.7456419
## 18 308.7664 1.2255139 1.2255139 3.206350 4.431864 1.2255139 1.2255139
## 19 450.0106 1.4430341 1.4430341 3.266248 4.709282 1.4430341 1.4430341
## 20 344.8986 1.2888835 1.2888835 3.142634 4.431517 1.2888835 1.2888835
## 21 256.2104 1.2531654 1.2531654 3.001555 4.254721 1.2531654 1.2531654
## 22 167.5222 1.0300218 1.0300218 2.955035 3.985057 1.0300218 1.0300218
## 23 323.5478 1.2348162 1.2348162 3.230741 4.465557 1.2348162 1.2348162
## 24 699.6515 1.6464352 1.6464352 3.155706 4.802141 1.6464352 1.6464352
## 25 596.1819 1.5294188 1.5294188 3.115966 4.645384 1.5294188 1.5294188
Corr<-cor(Set3, method = "pearson")
round(Corr, digits = 3)
## B lnD2H lnD lnH lnDH lnDH3 lnD3
## B 1.000 0.950 0.950 0.536 0.941 0.950 0.950
## lnD2H 0.950 1.000 1.000 0.474 0.963 1.000 1.000
## lnD 0.950 1.000 1.000 0.474 0.963 1.000 1.000
## lnH 0.536 0.474 0.474 1.000 0.694 0.474 0.474
## lnDH 0.941 0.963 0.963 0.694 1.000 0.963 0.963
## lnDH3 0.950 1.000 1.000 0.474 0.963 1.000 1.000
## lnD3 0.950 1.000 1.000 0.474 0.963 1.000 1.000
library(psych)
pairs.panels(x = Set3, ellipses = FALSE, lm = TRUE, method = "pearson", hist.col = "#CD1076")
biomasa tiene buena correlacion con la mayoria de las variables , la mas baja es lnH , que tambien se puede observar en figura verde ,lnH es la mas mas lejana a biomasa
Otra_Corr=cor(Set3)
library(qgraph)
qgraph(Otra_Corr, shape="circle", posCol="darkgreen", negCol="darkred", layout="spring", vsize=10)
regvacia3<-lm(formula = B~1, Set3); summary(regvacia3)
##
## Call:
## lm(formula = B ~ 1, data = Set3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -328.08 -186.84 -73.51 133.43 461.90
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 495.60 49.57 9.998 4.94e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 247.9 on 24 degrees of freedom
regcompleta3<-lm(formula = B~.,Set3);
summary(regcompleta3)
##
## Call:
## lm(formula = B ~ ., data = Set3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -114.11 -46.38 -15.47 23.04 138.61
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1779.65 581.17 -3.062 0.00571 **
## lnD2H 932.14 74.35 12.538 1.71e-11 ***
## lnD NA NA NA NA
## lnH 305.75 198.58 1.540 0.13790
## lnDH NA NA NA NA
## lnDH3 NA NA NA NA
## lnD3 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.57 on 22 degrees of freedom
## Multiple R-squared: 0.9125, Adjusted R-squared: 0.9046
## F-statistic: 114.7 on 2 and 22 DF, p-value: 2.3e-12
regforw3<-step(regvacia3, scope = list(lower=regvacia3, upper=regcompleta3),direction = "forward"); summary(regforw3)
## Start: AIC=276.62
## B ~ 1
##
## Df Sum of Sq RSS AIC
## + lnD2H 1 1331470 142900 220.28
## + lnD 1 1331470 142900 220.28
## + lnDH3 1 1331470 142900 220.28
## + lnD3 1 1331470 142900 220.28
## + lnDH 1 1306351 168019 224.32
## + lnH 1 423655 1050715 270.15
## <none> 1474370 276.62
##
## Step: AIC=220.28
## B ~ lnD2H
##
## Df Sum of Sq RSS AIC
## + lnDH 1 13901 128999 219.72
## + lnH 1 13901 128999 219.72
## <none> 142900 220.28
##
## Step: AIC=219.72
## B ~ lnD2H + lnDH
##
## Df Sum of Sq RSS AIC
## <none> 128999 219.72
##
## Call:
## lm(formula = B ~ lnD2H + lnDH, data = Set3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -114.11 -46.38 -15.47 23.04 138.61
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1779.7 581.2 -3.062 0.00571 **
## lnD2H 626.4 242.8 2.580 0.01710 *
## lnDH 305.8 198.6 1.540 0.13790
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.57 on 22 degrees of freedom
## Multiple R-squared: 0.9125, Adjusted R-squared: 0.9046
## F-statistic: 114.7 on 2 and 22 DF, p-value: 2.3e-12
regstep3<-step(regvacia3, scope = list(lower=regvacia3, upper=regcompleta3),direction = "both"); summary(regstep3)
## Start: AIC=276.62
## B ~ 1
##
## Df Sum of Sq RSS AIC
## + lnD2H 1 1331470 142900 220.28
## + lnD 1 1331470 142900 220.28
## + lnDH3 1 1331470 142900 220.28
## + lnD3 1 1331470 142900 220.28
## + lnDH 1 1306351 168019 224.32
## + lnH 1 423655 1050715 270.15
## <none> 1474370 276.62
##
## Step: AIC=220.28
## B ~ lnD2H
##
## Df Sum of Sq RSS AIC
## + lnDH 1 13901 128999 219.72
## + lnH 1 13901 128999 219.72
## <none> 142900 220.28
## - lnD2H 1 1331470 1474370 276.62
##
## Step: AIC=219.72
## B ~ lnD2H + lnDH
##
## Df Sum of Sq RSS AIC
## <none> 128999 219.72
## - lnDH 1 13901 142900 220.28
## - lnD2H 1 39019 168019 224.32
##
## Call:
## lm(formula = B ~ lnD2H + lnDH, data = Set3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -114.11 -46.38 -15.47 23.04 138.61
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1779.7 581.2 -3.062 0.00571 **
## lnD2H 626.4 242.8 2.580 0.01710 *
## lnDH 305.8 198.6 1.540 0.13790
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.57 on 22 degrees of freedom
## Multiple R-squared: 0.9125, Adjusted R-squared: 0.9046
## F-statistic: 114.7 on 2 and 22 DF, p-value: 2.3e-12
lmFit <-lm(formula = B~.,Set3) # El modelo, B = variable dependiente.
boot.stepAIC(regstep3, Set3)
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = B ~ lnD2H + lnDH, data = Set3)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## lnD2H 94
## lnDH 62
##
## Coefficients Sign
## + (%) - (%)
## lnD2H 100.00 0.00
## lnDH 98.39 1.61
##
## Stat Significance
## (%)
## lnD2H 76.60
## lnDH 56.45
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = B ~ lnD2H + lnDH, data = Set3)
##
## Coefficients:
## (Intercept) lnD2H lnDH
## -1779.7 626.4 305.8
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## B ~ lnD2H + lnDH
##
## Final Model:
## B ~ lnD2H + lnDH
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 22 128999.4 219.7172
boot.stepAIC(regstep3, Set3, B = 100, alpha = 0.05, direction = "forward", # Hacer 100 remuestreos
k = 2, verbose = T)
##
## 1 replicate finished
## 2 replicate finished
## 3 replicate finished
## 4 replicate finished
## 5 replicate finished
## 6 replicate finished
## 7 replicate finished
## 8 replicate finished
## 9 replicate finished
## 10 replicate finished
## 11 replicate finished
## 12 replicate finished
## 13 replicate finished
## 14 replicate finished
## 15 replicate finished
## 16 replicate finished
## 17 replicate finished
## 18 replicate finished
## 19 replicate finished
## 20 replicate finished
## 21 replicate finished
## 22 replicate finished
## 23 replicate finished
## 24 replicate finished
## 25 replicate finished
## 26 replicate finished
## 27 replicate finished
## 28 replicate finished
## 29 replicate finished
## 30 replicate finished
## 31 replicate finished
## 32 replicate finished
## 33 replicate finished
## 34 replicate finished
## 35 replicate finished
## 36 replicate finished
## 37 replicate finished
## 38 replicate finished
## 39 replicate finished
## 40 replicate finished
## 41 replicate finished
## 42 replicate finished
## 43 replicate finished
## 44 replicate finished
## 45 replicate finished
## 46 replicate finished
## 47 replicate finished
## 48 replicate finished
## 49 replicate finished
## 50 replicate finished
## 51 replicate finished
## 52 replicate finished
## 53 replicate finished
## 54 replicate finished
## 55 replicate finished
## 56 replicate finished
## 57 replicate finished
## 58 replicate finished
## 59 replicate finished
## 60 replicate finished
## 61 replicate finished
## 62 replicate finished
## 63 replicate finished
## 64 replicate finished
## 65 replicate finished
## 66 replicate finished
## 67 replicate finished
## 68 replicate finished
## 69 replicate finished
## 70 replicate finished
## 71 replicate finished
## 72 replicate finished
## 73 replicate finished
## 74 replicate finished
## 75 replicate finished
## 76 replicate finished
## 77 replicate finished
## 78 replicate finished
## 79 replicate finished
## 80 replicate finished
## 81 replicate finished
## 82 replicate finished
## 83 replicate finished
## 84 replicate finished
## 85 replicate finished
## 86 replicate finished
## 87 replicate finished
## 88 replicate finished
## 89 replicate finished
## 90 replicate finished
## 91 replicate finished
## 92 replicate finished
## 93 replicate finished
## 94 replicate finished
## 95 replicate finished
## 96 replicate finished
## 97 replicate finished
## 98 replicate finished
## 99 replicate finished
## 100 replicate finished
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = B ~ lnD2H + lnDH, data = Set3)
##
## Bootstrap samples: 100
## Direction: forward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## lnD2H 100
## lnDH 100
##
## Coefficients Sign
## + (%) - (%)
## lnD2H 100 0
## lnDH 95 5
##
## Stat Significance
## (%)
## lnD2H 67
## lnDH 41
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = B ~ lnD2H + lnDH, data = Set3)
##
## Coefficients:
## (Intercept) lnD2H lnDH
## -1779.7 626.4 305.8
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## B ~ lnD2H + lnDH
##
## Final Model:
## B ~ lnD2H + lnDH
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 22 128999.4 219.7172
Final Model: B ~ lnD2H + lnDH
## ajuste de modelo
MODELO3<-lm(B ~ lnD2H+lnDH, data = Muestra1) # SIGNIFICATIVO
summary(MODELO3)
##
## Call:
## lm(formula = B ~ lnD2H + lnDH, data = Muestra1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -114.11 -46.38 -15.47 23.04 138.61
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1779.7 581.2 -3.062 0.00571 **
## lnD2H 626.4 242.8 2.580 0.01710 *
## lnDH 305.8 198.6 1.540 0.13790
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.57 on 22 degrees of freedom
## Multiple R-squared: 0.9125, Adjusted R-squared: 0.9046
## F-statistic: 114.7 on 2 and 22 DF, p-value: 2.3e-12
MODELO3$ residuals
## 1 2 3 4 5 6
## -99.235732 -20.102386 -111.053723 107.490701 109.425427 -2.288401
## 7 8 9 10 11 12
## 118.964300 13.524470 -46.384289 97.052248 -15.465997 -82.415554
## 13 14 15 16 17 18
## -53.052434 11.189789 23.037253 138.610797 21.736342 -34.286033
## 19 20 21 22 23 24
## -114.115322 -37.741842 -50.000541 83.535987 -35.633567 -20.274200
## 25
## -2.517292
la intercept es significativa y la variable lnD2H tambien , Adjusted R-squared: 0.9046 es decir este modelo explica el 90.46% de la biomasa
library(lmtest)
library(car)
summary(influence.measures(MODELO3))
## Potentially influential observations of
## lm(formula = B ~ lnD2H + lnDH, data = Muestra1) :
## NONE
## numeric(0)
influencePlot(MODELO3)
## StudRes Hat CookD
## 1 -1.556575 0.2620115 0.2693216
## 4 1.606181 0.1813363 0.1777172
## 7 1.712845 0.1050029 0.1054642
## 16 2.067082 0.1190629 0.1675693
## 22 1.265875 0.2369847 0.1614785
deacuerdo a lo anterior se tienen los valores de 1, 4, ,7 ,16, y 22, tienen influencia pero no sobrepasa el valor de 3 , asi que no es necesario eliminarlo.
#*VARIABLES*
B
## [1] 408.9512 269.3494 422.0902 169.1646 914.8025 371.1766 909.8754 837.6109
## [9] 298.9121 169.1646 566.6192 520.6327 629.0294 397.4546 555.1225 957.5043
## [17] 845.8228 308.7664 450.0106 344.8986 256.2104 167.5222 323.5478 699.6515
## [25] 596.1819
lnD2<-log(D)
lnDH<-log(D)
lnD2H<-log(D)
D3H<-D3*H
H2D2<-H2*D2
D
## [1] 4.392676 3.342254 4.360845 2.641972 5.570423 3.533240 5.506761 5.729578
## [9] 3.501409 2.737465 4.456338 4.520000 5.092958 3.596902 4.106198 5.697747
## [17] 5.729578 3.405916 4.233521 3.628733 3.501409 2.801127 3.437747 5.188451
## [25] 4.615493
H
## [1] 19.5072 21.9456 21.6408 21.3360 24.9936 24.3840 24.6888 24.3840 22.8600
## [10] 19.8120 23.7744 24.3840 21.9456 24.0792 25.9080 24.3840 24.3840 24.6888
## [19] 26.2128 23.1648 20.1168 19.2024 25.2984 23.4696 22.5552
Set4<-data.frame(B, lnD2, lnDH, lnD2H, D3H, H2D2,D,H ); Set4
## B lnD2 lnDH lnD2H D3H H2D2 D H
## 1 408.9512 1.4799387 1.4799387 1.4799387 1653.4177 7342.573 4.392676 19.5072
## 2 269.3494 1.2066454 1.2066454 1.2066454 819.3430 5379.895 3.342254 21.9456
## 3 422.0902 1.4726659 1.4726659 1.4726659 1794.6733 8906.109 4.360845 21.6408
## 4 169.1646 0.9715256 0.9715256 0.9715256 393.4573 3177.477 2.641972 21.3360
## 5 914.8025 1.7174710 1.7174710 1.7174710 4320.0955 19383.580 5.570423 24.9936
## 6 371.1766 1.2622152 1.2622152 1.2622152 1075.5343 7422.601 3.533240 24.3840
## 7 909.8754 1.7059766 1.7059766 1.7059766 4122.7659 18483.850 5.506761 24.6888
## 8 837.6109 1.7456419 1.7456419 1.7456419 4586.4097 19518.892 5.729578 24.3840
## 9 298.9121 1.2531654 1.2531654 1.2531654 981.3065 6406.754 3.501409 22.8600
## 10 169.1646 1.0070323 1.0070323 1.0070323 406.4190 2941.398 2.737465 19.8120
## 11 566.6192 1.4943274 1.4943274 1.4943274 2103.9919 11224.718 4.456338 23.7744
## 12 520.6327 1.5085121 1.5085121 1.5085121 2251.7510 12147.498 4.520000 24.3840
## 13 629.0294 1.6278588 1.6278588 1.6278588 2899.0639 12492.091 5.092958 21.9456
## 14 397.4546 1.2800728 1.2800728 1.2800728 1120.5410 7501.381 3.596902 24.0792
## 15 555.1225 1.4124974 1.4124974 1.4124974 1793.7148 11317.420 4.106198 25.9080
## 16 957.5043 1.7400708 1.7400708 1.7400708 4510.3934 19302.618 5.697747 24.3840
## 17 845.8228 1.7456419 1.7456419 1.7456419 4586.4097 19518.892 5.729578 24.3840
## 18 308.7664 1.2255139 1.2255139 1.2255139 975.4426 7070.787 3.405916 24.6888
## 19 450.0106 1.4430341 1.4430341 1.4430341 1988.9264 12314.885 4.233521 26.2128
## 20 344.8986 1.2888835 1.2888835 1.2888835 1106.8620 7065.893 3.628733 23.1648
## 21 256.2104 1.2531654 1.2531654 1.2531654 863.5497 4961.391 3.501409 20.1168
## 22 167.5222 1.0300218 1.0300218 1.0300218 422.0403 2893.188 2.801127 19.2024
## 23 323.5478 1.2348162 1.2348162 1.2348162 1027.8144 7563.693 3.437747 25.2984
## 24 699.6515 1.6464352 1.6464352 1.6464352 3278.0750 14828.146 5.188451 23.4696
## 25 596.1819 1.5294188 1.5294188 1.5294188 2217.6912 10837.513 4.615493 22.5552
Corr<-cor(Set4, method = "pearson") # Hacer la correlacion
round(Corr, digits = 3)
## B lnD2 lnDH lnD2H D3H H2D2 D H
## B 1.000 0.950 0.950 0.950 0.980 0.982 0.968 0.530
## lnD2 0.950 1.000 1.000 1.000 0.946 0.945 0.994 0.467
## lnDH 0.950 1.000 1.000 1.000 0.946 0.945 0.994 0.467
## lnD2H 0.950 1.000 1.000 1.000 0.946 0.945 0.994 0.467
## D3H 0.980 0.946 0.946 0.946 1.000 0.984 0.974 0.463
## H2D2 0.982 0.945 0.945 0.945 0.984 1.000 0.963 0.604
## D 0.968 0.994 0.994 0.994 0.974 0.963 1.000 0.441
## H 0.530 0.467 0.467 0.467 0.463 0.604 0.441 1.000
library(psych)
pairs.panels(x = Set4, ellipses = FALSE, lm = TRUE, method = "pearson", hist.col = "#CD1076")
library(qgraph)
biomasa tiene alta correlacion con la mayoria de las variables unicamente , la variable H(altura) , es la mas baja con 0.53,este se comprueba tambien con la figura verde .
Otra_Corr=cor(Set4)
library(qgraph)
qgraph(Otra_Corr, shape="circle", posCol="darkgreen", negCol="darkred", layout="spring", vsize=10)
regvacia4<-lm(formula = B~1, Set4); summary(regvacia4)
##
## Call:
## lm(formula = B ~ 1, data = Set4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -328.08 -186.84 -73.51 133.43 461.90
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 495.60 49.57 9.998 4.94e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 247.9 on 24 degrees of freedom
regcompleta4<-lm(formula = B~.,Set4);
summary(regcompleta4)
##
## Call:
## lm(formula = B ~ ., data = Set4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91.935 -15.642 -1.829 32.251 62.102
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.181e+01 4.340e+02 -0.119 0.906
## lnD2 -4.878e+02 1.677e+03 -0.291 0.774
## lnDH NA NA NA NA
## lnD2H NA NA NA NA
## D3H -7.097e-03 2.093e-01 -0.034 0.973
## H2D2 2.777e-02 3.955e-02 0.702 0.491
## D 2.216e+02 5.724e+02 0.387 0.703
## H 1.212e+00 2.031e+01 0.060 0.953
##
## Residual standard error: 45.75 on 19 degrees of freedom
## Multiple R-squared: 0.973, Adjusted R-squared: 0.9659
## F-statistic: 137.1 on 5 and 19 DF, p-value: 3.164e-14
regforw4<-step(regvacia4, scope = list(lower=regvacia4, upper=regcompleta4),direction = "forward"); summary(regforw4)
## Start: AIC=276.62
## B ~ 1
##
## Df Sum of Sq RSS AIC
## + H2D2 1 1422292 52078 195.04
## + D3H 1 1416589 57781 197.64
## + D 1 1381912 92458 209.39
## + lnD2 1 1331470 142900 220.28
## + lnDH 1 1331470 142900 220.28
## + lnD2H 1 1331470 142900 220.28
## + H 1 414442 1059928 270.37
## <none> 1474370 276.62
##
## Step: AIC=195.04
## B ~ H2D2
##
## Df Sum of Sq RSS AIC
## + D 1 10251.0 41827 191.56
## + H 1 9282.7 42795 192.13
## + D3H 1 8486.3 43592 192.59
## + lnD2 1 6683.8 45394 193.61
## + lnDH 1 6683.8 45394 193.61
## + lnD2H 1 6683.8 45394 193.61
## <none> 52078 195.04
##
## Step: AIC=191.56
## B ~ H2D2 + D
##
## Df Sum of Sq RSS AIC
## <none> 41827 191.56
## + lnD2 1 2050.3 39777 192.30
## + lnDH 1 2050.3 39777 192.30
## + lnD2H 1 2050.3 39777 192.30
## + D3H 1 1887.9 39939 192.41
## + H 1 1597.8 40229 192.59
##
## Call:
## lm(formula = B ~ H2D2 + D, data = Set4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -106.635 -20.793 -1.803 26.534 70.215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.544e+02 8.279e+01 -1.865 0.0756 .
## H2D2 3.105e-02 6.018e-03 5.160 3.58e-05 ***
## D 7.761e+01 3.342e+01 2.322 0.0299 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43.6 on 22 degrees of freedom
## Multiple R-squared: 0.9716, Adjusted R-squared: 0.9691
## F-statistic: 376.7 on 2 and 22 DF, p-value: < 2.2e-16
regstep4<-step(regvacia4, scope = list(lower=regvacia4, upper=regcompleta4),direction = "both"); summary(regstep4)
## Start: AIC=276.62
## B ~ 1
##
## Df Sum of Sq RSS AIC
## + H2D2 1 1422292 52078 195.04
## + D3H 1 1416589 57781 197.64
## + D 1 1381912 92458 209.39
## + lnD2 1 1331470 142900 220.28
## + lnDH 1 1331470 142900 220.28
## + lnD2H 1 1331470 142900 220.28
## + H 1 414442 1059928 270.37
## <none> 1474370 276.62
##
## Step: AIC=195.04
## B ~ H2D2
##
## Df Sum of Sq RSS AIC
## + D 1 10251 41827 191.56
## + H 1 9283 42795 192.13
## + D3H 1 8486 43592 192.59
## + lnD2 1 6684 45394 193.61
## + lnDH 1 6684 45394 193.61
## + lnD2H 1 6684 45394 193.61
## <none> 52078 195.04
## - H2D2 1 1422292 1474370 276.62
##
## Step: AIC=191.56
## B ~ H2D2 + D
##
## Df Sum of Sq RSS AIC
## <none> 41827 191.56
## + lnD2 1 2050 39777 192.30
## + lnDH 1 2050 39777 192.30
## + lnD2H 1 2050 39777 192.30
## + D3H 1 1888 39939 192.41
## + H 1 1598 40229 192.59
## - D 1 10251 52078 195.04
## - H2D2 1 50630 92458 209.39
##
## Call:
## lm(formula = B ~ H2D2 + D, data = Set4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -106.635 -20.793 -1.803 26.534 70.215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.544e+02 8.279e+01 -1.865 0.0756 .
## H2D2 3.105e-02 6.018e-03 5.160 3.58e-05 ***
## D 7.761e+01 3.342e+01 2.322 0.0299 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43.6 on 22 degrees of freedom
## Multiple R-squared: 0.9716, Adjusted R-squared: 0.9691
## F-statistic: 376.7 on 2 and 22 DF, p-value: < 2.2e-16
Aambos analisis muestran los mismos resultados ## Generar un modelo con Boostrap forward
#set4
lmFit <-lm(formula = B~.,Set4)
boot.stepAIC(regstep4, Set4)
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = B ~ H2D2 + D, data = Set4)
##
## Bootstrap samples: 100
## Direction: backward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## H2D2 99
## D 90
##
## Coefficients Sign
## + (%) - (%)
## D 100 0
## H2D2 100 0
##
## Stat Significance
## (%)
## H2D2 93.94
## D 74.44
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = B ~ H2D2 + D, data = Set4)
##
## Coefficients:
## (Intercept) H2D2 D
## -154.36426 0.03105 77.61324
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## B ~ H2D2 + D
##
## Final Model:
## B ~ H2D2 + D
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 22 41827.1 191.5606
boot.stepAIC(regstep4, Set4, B = 100, alpha = 0.05, direction = "forward",
k = 2, verbose = F)
##
## Summary of Bootstrapping the 'stepAIC()' procedure for
##
## Call:
## lm(formula = B ~ H2D2 + D, data = Set4)
##
## Bootstrap samples: 100
## Direction: forward
## Penalty: 2 * df
##
## Covariates selected
## (%)
## D 100
## H2D2 100
##
## Coefficients Sign
## + (%) - (%)
## D 100 0
## H2D2 100 0
##
## Stat Significance
## (%)
## H2D2 95
## D 67
##
##
## The stepAIC() for the original data-set gave
##
## Call:
## lm(formula = B ~ H2D2 + D, data = Set4)
##
## Coefficients:
## (Intercept) H2D2 D
## -154.36426 0.03105 77.61324
##
##
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## B ~ H2D2 + D
##
## Final Model:
## B ~ H2D2 + D
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 22 41827.1 191.5606
MODELO4<-lm(B ~ H2D2+D, data = Muestra1)
summary(MODELO4)
##
## Call:
## lm(formula = B ~ H2D2 + D, data = Muestra1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -106.635 -20.793 -1.803 26.534 70.215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.544e+02 8.279e+01 -1.865 0.0756 .
## H2D2 3.105e-02 6.018e-03 5.160 3.58e-05 ***
## D 7.761e+01 3.342e+01 2.322 0.0299 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43.6 on 22 degrees of freedom
## Multiple R-squared: 0.9716, Adjusted R-squared: 0.9691
## F-statistic: 376.7 on 2 and 22 DF, p-value: < 2.2e-16
MODELO4$ residuals
## 1 2 3 4 5 6
## -5.6344090 -2.7594937 -38.5797448 19.8018045 34.8806015 20.8094341
## 7 8 9 10 11 12
## 62.8351699 -58.8655913 -17.4379151 19.7216100 26.5344011 -53.0495689
## 13 14 15 16 17 18
## 0.1768685 39.6999218 39.3345325 70.2145341 -50.6537193 -20.7933673
## 19 20 21 22 23 24
## -106.6352313 -1.8026894 -15.2545816 14.6353685 -23.7894409 -9.1575857
## 25
## 55.7690917
rm(list=ls(all=TRUE))
setwd("C:/tarea2/tarea2")
Muestra3 <- read.csv("Muestra3.csv")
attach(Muestra3[,2:5])
## The following objects are masked from Muestra1[, 2:5]:
##
## B, D, H, V
head(Muestra3[,2:5])
## D H V B
## 1 4.392676 19.5072 0.7050883 408.9512
## 2 3.342254 21.9456 0.4643955 269.3494
## 3 4.360845 21.6408 0.7277418 422.0902
## 4 2.641972 21.3360 0.2916630 169.1646
## 5 5.570423 24.9936 1.5772458 914.8025
## 6 3.533240 24.3840 0.6399597 371.1766
B
## [1] 408.9512 269.3494 422.0902 169.1646 914.8025 371.1766 909.8754 837.6109
## [9] 298.9121 169.1646 566.6192 520.6327 629.0294 397.4546 555.1225 957.5043
## [17] 845.8228 308.7664 344.8986 256.2104 167.5222 323.5478 699.6515 596.1819
D2<-D*D
H2<-H*H
H2D2<-H2*D2
D
## [1] 4.392676 3.342254 4.360845 2.641972 5.570423 3.533240 5.506761 5.729578
## [9] 3.501409 2.737465 4.456338 4.520000 5.092958 3.596902 4.106198 5.697747
## [17] 5.729578 3.405916 3.628733 3.501409 2.801127 3.437747 5.188451 4.615493
H
## [1] 19.5072 21.9456 21.6408 21.3360 24.9936 24.3840 24.6888 24.3840 22.8600
## [10] 19.8120 23.7744 24.3840 21.9456 24.0792 25.9080 24.3840 24.3840 24.6888
## [19] 23.1648 20.1168 19.2024 25.2984 23.4696 22.5552
MODELO4.1<-lm(B ~ H2D2+D-1, data = Muestra3)
summary(MODELO4.1)
##
## Call:
## lm(formula = B ~ H2D2 + D - 1, data = Muestra3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -68.873 -21.287 -0.661 25.785 69.467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## H2D2 0.041651 0.002485 16.764 5.14e-14 ***
## D 16.319268 6.704469 2.434 0.0235 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.02 on 22 degrees of freedom
## Multiple R-squared: 0.9957, Adjusted R-squared: 0.9953
## F-statistic: 2553 on 2 and 22 DF, p-value: < 2.2e-16
MODELO4.1$ residuals
## 1 2 3 4 5 6 7
## 31.440237 -9.271870 -20.024174 -6.295676 16.551324 4.357778 50.137799
## 8 9 10 11 12 13 14
## -68.873471 -25.076187 1.978890 26.373939 -59.086184 25.607631 26.315563
## 15 16 17 18 19 20 21
## 16.730226 60.547358 -60.661599 -41.321216 -8.621343 -7.577035 1.305604
## 22 23 24
## -47.589321 -2.627715 69.466898
este modelo se ajusto dos veces , uno por la intercept y otro por un dato atipico(num.19), se ha corregido como se deberia , la estadistica nos arroja que las variables son significativas , y que tiene una Rcuadrada ajustada de 0.9953 es decir el 99.53 % .que es muy alto .
library(lmtest)
library(car)
summary(influence.measures(MODELO4.1))
## Potentially influential observations of
## lm(formula = B ~ H2D2 + D - 1, data = Muestra3) :
##
## dfb.H2D2 dfb.D dffit cov.r cook.d hat
## 5 0.17 -0.12 0.21 1.30_* 0.02 0.17
## 8 -0.71 0.51 -0.95_* 0.89 0.39 0.16
influencePlot(MODELO4.1)
## StudRes Hat CookD
## 5 0.4697941 0.17163073 0.02370386
## 8 -2.1362192 0.16430976 0.38608524
## 17 -1.8372820 0.16430976 0.29950699
## 24 2.0021860 0.05321191 0.09909817
lo anterior muestra que efectivamente ya no hay datos atipicos .el modelo ya es correcto .
se trabajo de la siguinte manera , para comprobobar los supuestos , se organizaron los resultados que buscamos en unas tablas ,para poder comparar mejor los modelos y porteriormente escoger el mejor .
VARIANZA1.jpg
EL MODELO 2,3 son los que tienen un p-value alto , en comparacion a los demas , es decir la probabilidad de que los errores se distribuyan homogeneamente es alta .
LINEALIDAD1.jpg
DISTRIBUCION1.jpg
El p-value es muy alto en los modelos 2 y 4, es decir; la probabilidad es muy alta que los errores se distribuyan normal.
AUTOCOR1.jpg
un valor cercano a 2, demuestra ausencia de autocorrelación o independencia de los residuales (errores).en este caso el unico modelo mas de 2 , es el modelo 4 .
para concluir , se observa que los modelos 2 y 3 , cumplen por lo menos la mayoria de los supuestos , no los cumplen todos , pero escojo el modelo 2 , pues este modelo tiene mas supuestos cumplidos ademas que ,si considera su Rcuadrada ajustada es del 99.99% es un valor muy alto . el modelo 3 sigue muy de cerca con una Rcuadrada ajustada de 99.53.
para concluir se escoje el modelo 2 es el mas adecuado.