Trees

Wilson Sandoval

10/11/2020

library(dplyr)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
library(DT)
data("trees")
trees%>%DT::datatable()
summary(trees)
##      Girth           Height       Volume     
##  Min.   : 8.30   Min.   :63   Min.   :10.20  
##  1st Qu.:11.05   1st Qu.:72   1st Qu.:19.40  
##  Median :12.90   Median :76   Median :24.20  
##  Mean   :13.25   Mean   :76   Mean   :30.17  
##  3rd Qu.:15.25   3rd Qu.:80   3rd Qu.:37.30  
##  Max.   :20.60   Max.   :87   Max.   :77.00
plot(trees)

library(GGally)
ggpairs(trees)

boxplot(trees$Girth)

boxplot(trees$Height)

boxplot(trees$Volume)

expresar volumen en términos de Girth y Height

Construccion del modelo

modelo_arbol = lm(Volume ~ Girth + Height, data = trees)
summary(modelo_arbol)
## 
## Call:
## lm(formula = Volume ~ Girth + Height, data = trees)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4065 -2.6493 -0.2876  2.2003  8.4847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -57.9877     8.6382  -6.713 2.75e-07 ***
## Girth         4.7082     0.2643  17.816  < 2e-16 ***
## Height        0.3393     0.1302   2.607   0.0145 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.882 on 28 degrees of freedom
## Multiple R-squared:  0.948,  Adjusted R-squared:  0.9442 
## F-statistic:   255 on 2 and 28 DF,  p-value: < 2.2e-16
nrow(trees)
## [1] 31

names(modelo_arbol)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"
modelo_arbol$df.residual
## [1] 28

crea una nueva a columna ala base de datos

trees$Volumen_Est=fitted.values(modelo_arbol)
trees%>%DT::datatable()
trees$Residuals=residuals(modelo_arbol)

\(Volumen=\beta_0+\beta_1*Diametro + \beta_2*Altura+epsilon\)

Estimación e la varianza del modelo

n=31
p=3
attach(trees)    # llamar las variables de la base de datos sin 
SCRes=sum((Volume-Volumen_Est)^2)
gl_SCRes  =  n-p       #  grados libertad
s2=SCRes/gl_SCRes
s=sqrt(s2)

Tabla ANOVA

Fuentes=c("Regresión","Residual","Total")
Fuentes
## [1] "Regresión" "Residual"  "Total"

Suma de Cuadrados Total

SCTotal=sum((Volume-mean(Volume))^2)
SCTotal
## [1] 8106.084
gl_SCTotal=n-1
gl_SCTotal
## [1] 30

Suma de Cuadrados de la regresión

SCReg=sum((Volumen_Est - mean(Volume))^2)
SCReg
## [1] 7684.163
gl_SCReg=2
gl_SCReg
## [1] 2
Fuentes=c("Regresión","Residual","Total")
Suma_Cuadrados=c(SCReg,SCRes,SCTotal)
Suma_Cuadrados
## [1] 7684.1625  421.9214 8106.0839

Tercera Columna: grados libertad

gl=c(gl_SCReg,gl_SCRes,gl_SCTotal)

Cuarta Columna: cuadrdos medios

Cuadrados_Medios=c(SCReg/gl_SCReg,SCRes/gl_SCRes,NA)

Columna del estadistico

F0=c(Cuadrados_Medios[1]/Cuadrados_Medios[2],NA,NA)
F0
## [1] 254.9723       NA       NA
summary(modelo_arbol)
## 
## Call:
## lm(formula = Volume ~ Girth + Height, data = trees)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4065 -2.6493 -0.2876  2.2003  8.4847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -57.9877     8.6382  -6.713 2.75e-07 ***
## Girth         4.7082     0.2643  17.816  < 2e-16 ***
## Height        0.3393     0.1302   2.607   0.0145 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.882 on 28 degrees of freedom
## Multiple R-squared:  0.948,  Adjusted R-squared:  0.9442 
## F-statistic:   255 on 2 and 28 DF,  p-value: < 2.2e-16
library(tidyverse)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'hms'
TABLA_ANOVA=tibble(Fuentes,Suma_Cuadrados,gl,Cuadrados_Medios,F0)
TABLA_ANOVA
## # A tibble: 3 × 5
##   Fuentes   Suma_Cuadrados    gl Cuadrados_Medios    F0
##   <chr>              <dbl> <dbl>            <dbl> <dbl>
## 1 Regresión          7684.     2           3842.   255.
## 2 Residual            422.    28             15.1   NA 
## 3 Total              8106.    30             NA     NA
summary(modelo_arbol)
## 
## Call:
## lm(formula = Volume ~ Girth + Height, data = trees)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4065 -2.6493 -0.2876  2.2003  8.4847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -57.9877     8.6382  -6.713 2.75e-07 ***
## Girth         4.7082     0.2643  17.816  < 2e-16 ***
## Height        0.3393     0.1302   2.607   0.0145 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.882 on 28 degrees of freedom
## Multiple R-squared:  0.948,  Adjusted R-squared:  0.9442 
## F-statistic:   255 on 2 and 28 DF,  p-value: < 2.2e-16

Intervalos de confianza para los parámetros de modelo

nivel de confianza: 95% por defecto

confint(modelo_arbol) ## 
##                    2.5 %      97.5 %
## (Intercept) -75.68226247 -40.2930554
## Girth         4.16683899   5.2494820
## Height        0.07264863   0.6058538
confint(modelo_arbol, level=0.99)
##                    0.5 %      99.5 %
## (Intercept) -81.85734413 -34.1179737
## Girth         3.97792803   5.4383930
## Height       -0.02039064   0.6988931

Intervalos de confianza para la respuesta media

range(trees$Girth)
## [1]  8.3 20.6
range(trees$Height)
## [1] 63 87
y_pred=predict(modelo_arbol,data.frame(Girth=10,Height=70),
               interval="confidence", data=trees, level = 0.99)

y_pred
##        fit      lwr      upr
## 1 12.84153 9.895575 15.78749