Librerías necesarias
library(reshape)
## Warning: package 'reshape' was built under R version 4.4.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:reshape':
##
## rename
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moments)
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(viridisLite)
1. Simule los siguientes procesos:
Inciso a. yi = 5 + 3 ∗ xi + ui donde ui ∼ Ν(μ = 0, σ = 5), xi posee una distribución unitaria con soporte positivo de su elección y cov(ui, xi) = 0.
# Fijamos una semilla
set.seed(223)
n <- 4532 # últimos num | matricula
x <- runif(n, 0, 1) # distribución >= 0
u <- rnorm(n, mean = 0, sd = 5) # error
y <- 5 + 3 * x + u # variable dependiente
# Comprobación de la varianza
cov(u, x)
## [1] -0.01708463
Pasamos a ajustar la regresión y a revisar tanto el intercepto como los demás estimadores
modelo <- lm(y ~ x)
summary(modelo)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.1547 -3.4358 0.1112 3.3718 15.9968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0761 0.1510 33.62 <2e-16 ***
## x 2.7871 0.2615 10.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.988 on 4530 degrees of freedom
## Multiple R-squared: 0.02446, Adjusted R-squared: 0.02424
## F-statistic: 113.6 on 1 and 4530 DF, p-value: < 2.2e-16
El intercepto mantiene el valor esperado de 5 (y cuando x= 0). Sin embargo en la pendiente se estimo un valor de 2.88.
# combinación de plots
par(mfrow = c(2, 3),
las = 1,
mar = c(4, 4, 2, 1
))
plot(modelo, las=1, col="#817", which=1)
plot(modelo, las=1, col='#929', which=2)
plot(modelo, las=1, col='#948', which=3)
plot(x, modelo$residuals, col='#945',
xlab="x", ylab="Residuals",
main="x vs Residuals")
plot(x, modelo$residuals / sd(modelo$residuals),
col='#901', xlab="x",
ylab="Standardized Residuals",
main="Scale-Location")
Inciso b
set.seed(231)
n1 <- 5555
x1 <- runif(n1, 0, 1) # distribucion exp positiva
# crear vector para la varianza
sigma <- numeric(n1)
for(i in 1:floor(n1/3)) {
sigma[1] <- 2 +(4*x1)[1]
}
for(i in (floor(n1/3)+1):floor(2*n1/3)) {
sigma[i] <- 0.5
}
for(i in (floor(2*n1/3)+1):n1) {
sigma[i] <- 0.9 * sigma[i-1] + 2
}
# desviación estandar
sd_i <- sqrt(sigma)
#
u1 <- rnorm(n1, mean = 0, sd = sd_i)
y1 <- 5 + 3*x1 + u1
# Comprobación de la varianza
cov(u1, x1)
## [1] 0.01028037
modelo1 <- lm(y1 ~ x1)
summary(modelo1)
##
## Call:
## lm(formula = y1 ~ x1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9066 -0.4203 0.0401 0.4193 14.5516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.89659 0.06957 70.38 <2e-16 ***
## x1 3.12205 0.11950 26.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.585 on 5553 degrees of freedom
## Multiple R-squared: 0.1095, Adjusted R-squared: 0.1093
## F-statistic: 682.6 on 1 and 5553 DF, p-value: < 2.2e-16
Inciso c
# Fijamos una semilla
set.seed(203)
Inciso a
Inciso a
Inciso a
Inciso a
Inciso a
Inciso a