\(~\)
\(~\)
library(tidyverse)
library(kableExtra)
library(skedastic)
# Generating the data
x_1 <- rnorm(n = 10000, mean = 2, sd = 3)
aux <- rnorm(n = 10000, mean = 0, sd = 1)
# Computing y and u
u <- 2*x_1*aux
y <- 2 + 3*x_1 + u
# Building the dataset
data <- cbind(y, x_1, aux, u)
# Printing the output
head(data) %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
y | x_1 | aux | u |
---|---|---|---|
5.210574 | 0.9039949 | 0.2757700 | 0.4985893 |
4.425891 | 1.1728867 | -0.4658461 | -1.0927694 |
-1.743588 | -3.5699377 | -0.9756788 | 6.9662254 |
2.194987 | 4.3637542 | -1.4776583 | -12.8962755 |
29.207952 | 5.9519592 | 0.7856299 | 9.3520746 |
19.738861 | 4.6820141 | 0.3943622 | 3.6928190 |
\(~\)
\(~\)
# Building the model
model_1 <- lm(y ~ x_1, data = as.data.frame(data))
# Extracting the residuals
residuals <- residuals(model_1)
# Plotting the residuals
residuals_plot <- as.data.frame(residuals) %>%
ggplot(aes(y = x_1, x = residuals), color = V1) +
geom_point(color = '#7DD181') +
theme(panel.background =
element_rect(fill ="white"),
axis.line = element_line(size = 0.5,
colour ="gray",
linetype=1)) +
xlab("Residuals")
# Printing the output
show(residuals_plot)
\(~\)
\(~\)
\(~\)
# Extracting the fitted values
fitted_y <- fitted(model_1)
fitted_u <- resid(model_1)
fitted_y_2 <- fitted(model_1)^2
fitted_u_2 <- resid(model_1)^2
# Adding to the dataset
data_1 <- cbind(y, x_1, aux, u, fitted_y, fitted_u, fitted_y_2, fitted_u_2)
# Printing the output
head(data_1) %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
y | x_1 | aux | u | fitted_y | fitted_u | fitted_y_2 | fitted_u_2 |
---|---|---|---|---|---|---|---|
5.210574 | 0.9039949 | 0.2757700 | 0.4985893 | 4.786149 | 0.4244255 | 22.90722 | 0.180137 |
4.425891 | 1.1728867 | -0.4658461 | -1.0927694 | 5.589722 | -1.1638315 | 31.24499 | 1.354504 |
-1.743588 | -3.5699377 | -0.9756788 | 6.9662254 | -8.584042 | 6.8404541 | 73.68578 | 46.791813 |
2.194987 | 4.3637542 | -1.4776583 | -12.8962755 | 15.125518 | -12.9305304 | 228.78128 | 167.198617 |
29.207952 | 5.9519592 | 0.7856299 | 9.3520746 | 19.871812 | 9.3361399 | 394.88892 | 87.163508 |
19.738861 | 4.6820141 | 0.3943622 | 3.6928190 | 16.076626 | 3.6622352 | 258.45791 | 13.411967 |
\(~\)
\(~\)
# Computing model 2
model_2 <- lm(fitted_u_2 ~ fitted_y + fitted_y_2, data = as.data.frame(data_1))
# Summary
summary(model_2)
##
## Call:
## lm(formula = fitted_u_2 ~ fitted_y + fitted_y_2, data = as.data.frame(data_1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -720.35 -29.39 -4.07 1.45 2648.44
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.07467 1.66408 1.848 0.0647 .
## fitted_y -2.17598 0.21906 -9.933 <2e-16 ***
## fitted_y_2 0.46508 0.01089 42.692 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 123.7 on 9997 degrees of freedom
## Multiple R-squared: 0.2418, Adjusted R-squared: 0.2417
## F-statistic: 1594 on 2 and 9997 DF, p-value: < 2.2e-16
# Storing the R^2
R_2 <- summary(model_2)$r.squared
\(~\)
can you conclude?
\(~\)
# Custom White's test
nR_2 <- 10000*R_2
chi.stat <- dchisq(nR_2, df = 2)
p.value <- round(chi.stat, digits = 5)
# Results
tibble(nR_2, chi.stat, p.value) %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
nR_2 | chi.stat | p.value |
---|---|---|
2418.171 | 0 | 0 |
\(~\)
As the p-value is below the level of significance (alpha); the null hypothesis of homoskedasticity is rejected.
\(~\)
\(~\)
# skedastic function
white_lm(model_1) %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
statistic | p.value | parameter | method | alternative |
---|---|---|---|---|
2418.171 | 0 | 2 | White’s Test | greater |
\(~\)
\(~\)
# Feasible least square estimation
fgls_model <- lm(y ~ x_1, data = as.data.frame(data), weights =1/model_1$residuals^2)
# output
summary(fgls_model)
##
## Call:
## lm(formula = y ~ x_1, data = as.data.frame(data), weights = 1/model_1$residuals^2)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1.1078 -1.0002 -0.9997 0.9998 1.0593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0847526 0.0004988 4179 <2e-16 ***
## x_1 2.9886739 0.0005330 5607 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1 on 9998 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 3.144e+07 on 1 and 9998 DF, p-value: < 2.2e-16
# var-cov matrix : fgls_model
vcov(fgls_model)
## (Intercept) x_1
## (Intercept) 2.488374e-07 -5.025373e-08
## x_1 -5.025373e-08 2.840740e-07
# var-cov matrix : model_1
vcov(model_1)
## (Intercept) x_1
## (Intercept) 0.007477413 -0.0011379232
## x_1 -0.001137923 0.0005821868
\(~\)
The feasible least square estimation has manage to reduce the the standard error.