# =========================================
# PRE EXAMEN 2 - VERSION FINAL
# =========================================

# =====================
# 1. LIBRERÍAS
# =====================


library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
ventas_empresa <- read_excel(file.choose())

str(ventas_empresa)
## tibble [24 × 4] (S3: tbl_df/tbl/data.frame)
##  $ V: num [1:24] 607 590 543 558 571 615 606 593 582 646 ...
##  $ C: num [1:24] 197 208 181 194 192 196 203 200 198 221 ...
##  $ P: num [1:24] 173 152 150 150 163 179 169 166 159 206 ...
##  $ M: num [1:24] 110 107 99 102 109 114 113 113 115 119 ...
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.3
## Cargando paquete requerido: zoo
## Warning: package 'zoo' was built under R version 4.5.2
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.5.2
library(fastGraph)
## Warning: package 'fastGraph' was built under R version 4.5.3
library(stargazer)
## Warning: package 'stargazer' was built under R version 4.5.2
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
options(scipen = 9999999)
## Warning in options(scipen = 9999999): invalid 'scipen' 9999999, used 9999
# =====================
# 2. VERIFICAR DATOS
# =====================

# Asegúrate que tu dataset se llama ventas_empresa
str(ventas_empresa)
## tibble [24 × 4] (S3: tbl_df/tbl/data.frame)
##  $ V: num [1:24] 607 590 543 558 571 615 606 593 582 646 ...
##  $ C: num [1:24] 197 208 181 194 192 196 203 200 198 221 ...
##  $ P: num [1:24] 173 152 150 150 163 179 169 166 159 206 ...
##  $ M: num [1:24] 110 107 99 102 109 114 113 113 115 119 ...
head(ventas_empresa)
## # A tibble: 6 × 4
##       V     C     P     M
##   <dbl> <dbl> <dbl> <dbl>
## 1   607   197   173   110
## 2   590   208   152   107
## 3   543   181   150    99
## 4   558   194   150   102
## 5   571   192   163   109
## 6   615   196   179   114
# =====================
# 3. MODELO
# =====================
library(readxl)
library(readxl)
ventas_empresa <- read_excel(file.choose())

str(ventas_empresa)
## tibble [24 × 4] (S3: tbl_df/tbl/data.frame)
##  $ V: num [1:24] 607 590 543 558 571 615 606 593 582 646 ...
##  $ C: num [1:24] 197 208 181 194 192 196 203 200 198 221 ...
##  $ P: num [1:24] 173 152 150 150 163 179 169 166 159 206 ...
##  $ M: num [1:24] 110 107 99 102 109 114 113 113 115 119 ...
modelo <- lm(V ~ C + P + M, data = ventas_empresa)

summary(modelo)
## 
## Call:
## lm(formula = V ~ C + P + M, data = ventas_empresa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.279  -6.966   1.555   6.721  14.719 
## 
## Coefficients:
##             Estimate Std. Error t value   Pr(>|t|)    
## (Intercept) 107.4435    18.0575   5.950 0.00000808 ***
## C             0.9226     0.2227   4.142   0.000505 ***
## P             0.9502     0.1558   6.097 0.00000586 ***
## M             1.2978     0.4307   3.013   0.006872 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.506 on 20 degrees of freedom
## Multiple R-squared:  0.9798, Adjusted R-squared:  0.9768 
## F-statistic: 323.6 on 3 and 20 DF,  p-value: < 0.00000000000000022
stargazer(modelo,
          type="html",
          title="Modelo de Ventas",
          digits=4)
## 
## <table style="text-align:center"><caption><strong>Modelo de Ventas</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>V</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">C</td><td>0.9226<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.2227)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">P</td><td>0.9502<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.1558)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">M</td><td>1.2978<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.4307)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>107.4435<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(18.0575)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>24</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.9798</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.9768</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>9.5056 (df = 20)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>323.6415<sup>***</sup> (df = 3; 20)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
# =====================
# 4. NORMALIDAD
# =====================

# Jarque-Bera
jb <- jarque.bera.test(residuals(modelo))
print(jb)
## 
##  Jarque Bera Test
## 
## data:  residuals(modelo)
## X-squared = 1.4004, df = 2, p-value = 0.4965
# Shapiro-Wilk
sw <- shapiro.test(residuals(modelo))
print(sw)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(modelo)
## W = 0.95315, p-value = 0.3166
# Gráficos
hist(residuals(modelo), main="Histograma de residuos")

qqnorm(residuals(modelo))
qqline(residuals(modelo))

# =====================
# 5. MULTICOLINEALIDAD
# =====================

vif(modelo)
##        C        P        M 
## 7.631451 3.838911 9.449210
cor(ventas_empresa)
##           V         C         P         M
## V 1.0000000 0.9488347 0.9301324 0.9582886
## C 0.9488347 1.0000000 0.8204521 0.9312400
## P 0.9301324 0.8204521 1.0000000 0.8579160
## M 0.9582886 0.9312400 0.8579160 1.0000000
pairs(ventas_empresa)

# =====================
# 6. HETEROCEDASTICIDAD (WHITE)
# =====================

white <- bptest(modelo,
                ~ C + P + M +
                  I(C^2) + I(P^2) + I(M^2) +
                  I(C*P) + I(C*M) + I(P*M),
                data = ventas_empresa)

print(white)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo
## BP = 7.1227, df = 9, p-value = 0.6244
# Gráfico
plot(fitted(modelo), residuals(modelo),
     main="Residuos vs Ajustados",
     xlab="Valores ajustados",
     ylab="Residuos")

abline(h=0)

# =====================
# 7. AUTOCORRELACIÓN
# =====================

# BG orden 1
bg1 <- bgtest(modelo, order=1)
print(bg1)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  modelo
## LM test = 2.5963, df = 1, p-value = 0.1071
# BG orden 2
bg2 <- bgtest(modelo, order=2)
print(bg2)
## 
##  Breusch-Godfrey test for serial correlation of order up to 2
## 
## data:  modelo
## LM test = 3.8409, df = 2, p-value = 0.1465
# Gráficos
shadeDist(bg1$statistic,
          ddist="dchisq",
          parm1=qchisq(0.95, df=1),
          lower.tail=FALSE,
          main="BG Orden 1")

shadeDist(bg2$statistic,
          ddist="dchisq",
          parm1=qchisq(0.95, df=2),
          lower.tail=FALSE,
          main="BG Orden 2")

# =====================
# 8. EXTRA
# =====================

dwtest(modelo)
## 
##  Durbin-Watson test
## 
## data:  modelo
## DW = 1.2996, p-value = 0.02537
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(modelo)
## 
##  Durbin-Watson test
## 
## data:  modelo
## DW = 1.2996, p-value = 0.02537
## alternative hypothesis: true autocorrelation is greater than 0