Tabla 3.3 - Transformación de variables e interpretación de β1

Tabla 3.3 - Transformación de variables e interpretación de β1

Modelo Regresión Variable dependiente Variable independiente Interpretación de β1
Lin-Lin Yi = β0 + β1Xi + εi Y X ΔY = β1ΔX
Lin-Log Yi = β0 + β1log(Xi) + εi Y log(X) ΔY = (β1/100)%ΔX
Log-Lin log(Yi) = β0 + β1Xi + εi log(Y) X %ΔY = (100β1)ΔX
Log-Log log(Yi) = β0 + β1log(Xi) + εi log(Y) log(X) %ΔY = β1%ΔX
library(paqueteMETODOS)
## Cargando paquete requerido: dplyr
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Cargando paquete requerido: ggplot2
library(nortest)
data(biomasa)
head(biomasa)
## # A tibble: 6 × 8
##   finca   mg         bio_aerea bio_sub bio_total area_foliar diametro altura
##   <chr>   <chr>          <dbl>   <dbl>     <dbl>       <dbl>    <dbl>  <dbl>
## 1 FINCA_1 GENOTIPO_1     12.8     0.93     13.7         44.5      4.7    5  
## 2 FINCA_1 GENOTIPO_1     13.9     0.69     14.6         39.7      5.3    5.6
## 3 FINCA_1 GENOTIPO_1     15.1     0.78     15.9         45.6      4.8    5.8
## 4 FINCA_1 GENOTIPO_1      8.08    0.91      8.99        29.5      3.2    4.3
## 5 FINCA_1 GENOTIPO_1      5.58    1.41      6.99        22.5      2.2    3.3
## 6 FINCA_1 GENOTIPO_2     18.5     0.84     19.3         34.2      6.3    7.9
modelo1=lm(bio_total ~ diametro, data=biomasa)           # Lin - Lin
modelo2=lm(bio_total ~ log(diametro), data=biomasa)      # Lin - Log
modelo3=lm(log(bio_total) ~ diametro, data=biomasa)      # Log - Lin
modelo4=lm(log(bio_total) ~ log(diametro), data=biomasa) # Log - Log
u=modelo1$residuals
shapiro.test(u) # Normalidad
## 
##  Shapiro-Wilk normality test
## 
## data:  u
## W = 0.95356, p-value = 0.002793
lmtest::bptest(modelo1) # Homoscedasticidad
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo1
## BP = 11.76, df = 1, p-value = 0.0006052
lmtest::dwtest(modelo1) # No autocorrelación
## 
##  Durbin-Watson test
## 
## data:  modelo1
## DW = 1.0719, p-value = 1.035e-06
## alternative hypothesis: true autocorrelation is greater than 0
boxplot (u) # Outliers

u2=modelo2$residuals
shapiro.test(u2) # Normalidad
## 
##  Shapiro-Wilk normality test
## 
## data:  u2
## W = 0.90336, p-value = 5.741e-06
lmtest::bptest(modelo2) # Homoscedasticidad
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo2
## BP = 3.8361, df = 1, p-value = 0.05016
lmtest::dwtest(modelo2) # No autocorrelación
## 
##  Durbin-Watson test
## 
## data:  modelo2
## DW = 1.1852, p-value = 2.018e-05
## alternative hypothesis: true autocorrelation is greater than 0
boxplot (u2) # Outliers

u3=modelo3$residuals
shapiro.test(u3) # Normalidad
## 
##  Shapiro-Wilk normality test
## 
## data:  u3
## W = 0.98394, p-value = 0.3338
lmtest::bptest(modelo3) # Homoscedasticidad
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo3
## BP = 3.879, df = 1, p-value = 0.04889
lmtest::dwtest(modelo3) # No autocorrelación
## 
##  Durbin-Watson test
## 
## data:  modelo3
## DW = 0.67803, p-value = 1.716e-13
## alternative hypothesis: true autocorrelation is greater than 0
boxplot (u3) # Outliers

u4=modelo4$residuals
shapiro.test(u4) # Normalidad
## 
##  Shapiro-Wilk normality test
## 
## data:  u4
## W = 0.98194, p-value = 0.2459
lmtest::bptest(modelo4) # Homoscedasticidad
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo4
## BP = 0.79099, df = 1, p-value = 0.3738
lmtest::dwtest(modelo4) # No autocorrelación
## 
##  Durbin-Watson test
## 
## data:  modelo4
## DW = 0.91146, p-value = 6.207e-09
## alternative hypothesis: true autocorrelation is greater than 0
boxplot (u4) # Outliers

stargazer::stargazer(modelo1, modelo2, modelo3, modelo4, type="text", df=FALSE)
## 
## ===============================================================
##                                 Dependent variable:            
##                     -------------------------------------------
##                           bio_total          log(bio_total)    
##                        (1)        (2)        (3)        (4)    
## ---------------------------------------------------------------
## diametro             5.103***              0.278***            
##                      (0.251)               (0.011)             
##                                                                
## log(diametro)                  23.369***              1.344*** 
##                                 (1.564)               (0.058)  
##                                                                
## Constant            -9.020***  -19.909***  1.328***   0.618*** 
##                      (1.413)    (2.629)    (0.060)    (0.098)  
##                                                                
## ---------------------------------------------------------------
## Observations            90         90         90         90    
## R2                    0.825      0.717      0.887      0.858   
## Adjusted R2           0.823      0.714      0.885      0.857   
## Residual Std. Error   3.435      4.362      0.145      0.162   
## F Statistic         413.961*** 223.224*** 687.562*** 532.232***
## ===============================================================
## Note:                               *p<0.1; **p<0.05; ***p<0.01
Tabla de Transformaciones

Tabla de Transformaciones

λ -2 -1 -0.5 0 0.5 1 2
Transformación 1/y2 1/y 1/√y log(y) √y y y2
modelo1=lm(bio_total ~ diametro, data=biomasa)
summary(modelo1)
## 
## Call:
## lm(formula = bio_total ~ diametro, data = biomasa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3775 -2.6594  0.0237  1.8758 11.9876 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -9.0203     1.4129  -6.384 7.86e-09 ***
## diametro      5.1026     0.2508  20.346  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.435 on 88 degrees of freedom
## Multiple R-squared:  0.8247, Adjusted R-squared:  0.8227 
## F-statistic:   414 on 1 and 88 DF,  p-value: < 2.2e-16
par(mfrow = c(1,2))
library(MASS)
## 
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
boxcox(lm(mpg ~ wt, data = mtcars))

boxcox(lm(biomasa$bio_total ~ biomasa$diametro, data=biomasa), lambda = -3:3)

#Se repite el proceso pero esta vez entrechando el rango de valores de lambda 
bc<-boxcox(lm(biomasa$bio_total ~ biomasa$diametro), lambda = -1:1)

(lambda <- bc$x[which.max(bc$y)])
## [1] 0.07070707