Importar datos 1
url1<-"https://raw.githubusercontent.com/armandovl/datasets_uno/main/GPA_Y_CRECIMIENTO.csv"
datos1<-read.csv(url(url1))
head(datos1)
Modelado 1
modelo1<-lm(Tasa_Crecimiento~Gasto_Publico, datos1)
summary(modelo1)
Call:
lm(formula = Tasa_Crecimiento ~ Gasto_Publico, data = datos1)
Residuals:
Min 1Q Median 3Q Max
-1.2914 -0.6642 -0.0423 0.2239 2.0047
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.73651 0.21572 3.414 0.00113 **
Gasto_Publico 0.31240 0.02834 11.023 3.03e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9093 on 62 degrees of freedom
Multiple R-squared: 0.6621, Adjusted R-squared: 0.6567
F-statistic: 121.5 on 1 and 62 DF, p-value: 3.026e-16
Supuesto linealidad 1 (la línea roja debe acercarse a la horizontal)
plot(modelo1,1)
plot(Tasa_Crecimiento~Gasto_Publico,datos1,
pch=18,
col="purple")
Supuesto media del error = 0 (también sirve creo para la linealidad)
residuos<-modelo1$residuals
mean(residuos)
[1] -2.072351e-17
supuesto normalidad de los errores para este caso no hay normalidad
shapiro.test(residuos)
Shapiro-Wilk normality test
data: residuos
W = 0.92014, p-value = 0.0005036
library(nortest)
ad.test (residuos)
Anderson-Darling normality test
data: residuos
A = 1.6519, p-value = 0.0002744
hist(residuos , breaks = 30)
supuestos homogeneidad de las varianzas
si son homogeneas
#En este caso se rechaza la h0, por lo tanto hay heteregeneidad de varianza,no se cumple el supuesto
library(lmtest)
bptest(modelo1)
studentized Breusch-Pagan test
data: modelo1
BP = 0.030721, df = 1, p-value = 0.8609
predichos=fitted(modelo1)
plot(residuos~predichos)
supuesto de error con X
plot (datos1$Gasto_Publico,residuos)
url2<-"https://raw.githubusercontent.com/armandovl/datasets_uno/main/cobb_douglas.csv"
datos2<-read.csv(url(url2))
head(datos2,8)
NA
regresion1<-lm(produccion~capital+superficie+trabajadores,datos2)
summary(regresion1)
Call:
lm(formula = produccion ~ capital + superficie + trabajadores,
data = datos2)
Residuals:
Min 1Q Median 3Q Max
-1779.87 -1104.16 -64.34 961.48 1916.98
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -7787.0699 6576.6682 -1.184 0.26135
capital 10324.7667 2846.5727 3.627 0.00398 **
superficie 1.2729 0.4615 2.758 0.01862 *
trabajadores 1.9611 0.8163 2.402 0.03509 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1362 on 11 degrees of freedom
Multiple R-squared: 0.9574, Adjusted R-squared: 0.9458
F-statistic: 82.39 on 3 and 11 DF, p-value: 8.033e-08
regresion2<-lm(log(produccion)~log(capital)+log(superficie)+log(trabajadores),datos2)
summary(regresion2)
Call:
lm(formula = log(produccion) ~ log(capital) + log(superficie) +
log(trabajadores), data = datos2)
Residuals:
Min 1Q Median 3Q Max
-0.046143 -0.027828 0.001433 0.022655 0.043295
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.9408 2.3406 1.256 0.23500
log(capital) 0.3528 0.1028 3.431 0.00561 **
log(superficie) 0.5187 0.1944 2.668 0.02187 *
log(trabajadores) 0.2870 0.1607 1.786 0.10162
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.03512 on 11 degrees of freedom
Multiple R-squared: 0.9548, Adjusted R-squared: 0.9425
F-statistic: 77.51 on 3 and 11 DF, p-value: 1.106e-07
dwtest(regresion2) #no hay autocorrelación
Durbin-Watson test
data: regresion2
DW = 1.9354, p-value = 0.2951
alternative hypothesis: true autocorrelation is greater than 0
url3<-"https://raw.githubusercontent.com/armandovl/datasets_uno/main/regiones.csv"
datos3<-read.csv(url(url3))
head(datos3,8)
datos3$Region <- as.factor(datos3$Region) #convertir a factor
regresion3<-lm(Gasto~Region+Salario,datos3)
Warning in for (i in seq_len(n)) { :
cerrando la conenexion 3 (https://raw.githubusercontent.com/armandovl/datasets_uno/main/regiones.csv) que no esta siendo utilizada
summary(regresion3)
Call:
lm(formula = Gasto ~ Region + Salario, data = datos3)
Residuals:
Min 1Q Median 3Q Max
-1332.78 -318.78 -8.37 323.14 1295.64
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.262e+03 5.142e+02 -2.455 0.0179 *
RegionOeste -3.481e+02 2.062e+02 -1.688 0.0980 .
RegionSur -3.028e+02 1.904e+02 -1.591 0.1184
Salario 2.114e-01 2.042e-02 10.354 1.03e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 575.5 on 47 degrees of freedom
Multiple R-squared: 0.7201, Adjusted R-squared: 0.7023
F-statistic: 40.31 on 3 and 47 DF, p-value: 4.791e-13
ggplot(datos3, aes(x=Salario,
y=Gasto,
color=Region,
shape=Region,
)
)+ facet_grid(Region~.)+
geom_point(size=2) +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
url4<-"https://raw.githubusercontent.com/armandovl/datasets_uno/main/ingreso.csv"
datos4<-read.csv(url(url4))
head(datos4,8)
regresion4<-lm(ingreso~genero+exp, datos4) #modelo regresión
summary(regresion4) #estadísticos del modelo
Call:
lm(formula = ingreso ~ genero + exp, data = datos4)
Residuals:
Min 1Q Median 3Q Max
-3629.6 -890.4 -273.9 1163.2 4166.6
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11871.73 836.95 14.185 7.49e-11 ***
generomujer -4580.74 928.26 -4.935 0.000126 ***
exp 329.24 53.33 6.173 1.02e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2011 on 17 degrees of freedom
Multiple R-squared: 0.7481, Adjusted R-squared: 0.7185
F-statistic: 25.24 on 2 and 17 DF, p-value: 8.135e-06
ggplot(datos4, aes(x=exp, y=ingreso, color=genero, shape=genero)) +
geom_point(size=2) +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
Warning: cerrando la conenexion 3 (https://raw.githubusercontent.com/armandovl/datasets_uno/main/ingresos.csv) que no esta siendo utilizada
regresion45<-lm(log(ingreso)~genero+log(exp), datos4) #modelo regresión
summary(regresion45) #estadísticos del modelo
Call:
lm(formula = log(ingreso) ~ genero + log(exp), data = datos4)
Residuals:
Min 1Q Median 3Q Max
-0.35093 -0.07041 0.00149 0.09529 0.29485
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.08757 0.11287 80.516 < 2e-16 ***
generomujer -0.38709 0.07956 -4.866 0.000145 ***
log(exp) 0.26217 0.05078 5.162 7.81e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1658 on 17 degrees of freedom
Multiple R-squared: 0.685, Adjusted R-squared: 0.6479
F-statistic: 18.48 on 2 and 17 DF, p-value: 5.441e-05
datos5<-read.csv(url("https://raw.githubusercontent.com/armandovl/datasets_uno/main/cobb2.csv"))
head(datos5,7) #estructura de los datos primeros 7 registros
summary(datos5)
anio xt wt ct
Min. :1999 Min. :179.2 Min. :126.4 Min. :1141
1st Qu.:2003 1st Qu.:185.6 1st Qu.:139.4 1st Qu.:1538
Median :2006 Median :241.2 Median :143.9 Median :2246
Mean :2006 Mean :262.6 Mean :149.1 Mean :2168
3rd Qu.:2010 3rd Qu.:320.9 3rd Qu.:152.8 3rd Qu.:2735
Max. :2014 Max. :424.7 Max. :193.5 Max. :3082
regresion2<-lm(log(xt)~log(wt)+log(ct),datos5) #regresión Cobb DOuglas
summary(regresion2) #estadísticos de la regresion
Call:
lm(formula = log(xt) ~ log(wt) + log(ct), data = datos5)
Residuals:
Min 1Q Median 3Q Max
-0.16168 -0.04704 -0.01696 0.03868 0.16616
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -7.2869 2.2966 -3.173 0.00734 **
log(wt) 0.9402 0.3144 2.991 0.01042 *
log(ct) 1.0637 0.1119 9.506 3.23e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.09381 on 13 degrees of freedom
Multiple R-squared: 0.9139, Adjusted R-squared: 0.9006
F-statistic: 68.95 on 2 and 13 DF, p-value: 1.2e-07
dwtest(regresion2) #si hay autocorrelación
Durbin-Watson test
data: regresion2
DW = 0.73868, p-value = 0.0001125
alternative hypothesis: true autocorrelation is greater than 0
cor(datos5) #si hay multicolinealidad
anio xt wt ct
anio 1.0000000 0.9362228 -0.7339226 0.9930411
xt 0.9362228 1.0000000 -0.5069428 0.9234758
wt -0.7339226 -0.5069428 1.0000000 -0.7120275
ct 0.9930411 0.9234758 -0.7120275 1.0000000
url2<-"https://raw.githubusercontent.com/armandovl/datasets_uno/main/CAP2_MCO_0.csv"
datos2<-read.csv(url(url2))
head(datos2)
modelo2<-lm(y~x2+x3,datos2)
summary(modelo2)
Call:
lm(formula = y ~ x2 + x3, data = datos2)
Residuals:
Min 1Q Median 3Q Max
-0.20360 -0.08483 0.01550 0.06820 0.28696
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.381549 0.131886 10.475 2.75e-13 ***
x2 0.022279 0.001305 17.075 < 2e-16 ***
x3 -0.003898 0.007560 -0.516 0.609
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1016 on 42 degrees of freedom
Multiple R-squared: 0.9752, Adjusted R-squared: 0.974
F-statistic: 825.3 on 2 and 42 DF, p-value: < 2.2e-16
plot(modelo2)
url3<-"https://raw.githubusercontent.com/armandovl/datasets_uno/main/CAP2_MCO_0.csv"
datos2<-read.csv(url(url3))