En esta actividad usaremos 3 diferentes bases de datos.
Tema 1. Datos panel
1. Hogares: con variables como Año, Miembros, Ingresos, Gasto, Ahorro y Satisfacción.
Tema 2. Series de tiempo
2. Population: con variables como estado, año y población.
Tema 3. Modelos de ecuaciones estructurales
3. Ecosistema: con las siguientes variables,
Calidad del Suelo:
SPH: pH del Suelo
NC: Contenido de Nutrientes
OM: Materia Orgánica
Calidad del Agua:
CL: Niveles de Contaminantes
DO: Oxígeno Disuelto
WPH: pH del Agua
Salud del Ecosistema
SD: Diversidad de Especies
BM: Biomasa
EP: Productividad del Ecosistema
# Paquetes necesarios
# install.packages("WDI")
# install.packages("wbstats")
# install.packages("gplots")
# install.packages("plm")
#install.packages("DataExplorer")
library(WDI)
library(wbstats)
library(dplyr)
library(tidyverse)
library(plm)
library(gplots)
library(readxl)
library(lmtest)
library(forecast)
library(readxl)
library(lavaanPlot)
library(lavaan)
library(DataExplorer)
df_hogares=read_excel('/Users/luisangeldiazcontreras/Library/CloudStorage/OneDrive-InstitutoTecnologicoydeEstudiosSuperioresdeMonterrey/9th season/M1/hogares.xlsx')
df_population=read_csv('/Users/luisangeldiazcontreras/Library/CloudStorage/OneDrive-InstitutoTecnologicoydeEstudiosSuperioresdeMonterrey/9th season/M1/population.csv')
df_ecosistemas=read_csv('/Users/luisangeldiazcontreras/Library/CloudStorage/OneDrive-InstitutoTecnologicoydeEstudiosSuperioresdeMonterrey/9th season/M1/ecosistema.csv')
plotmeans (HogarID ~ Año, main= "Heterogeneidad entre hogares en el tiempo", data=df_hogares)
df1 <- pdata.frame(df_hogares,index=c("HogarID", "Año"))
pooled <- plm(Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro, data = df1, model = "pooling")
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro,
## data = df1, model = "pooling")
##
## Balanced Panel: n = 100, T = 10, N = 1000
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.522293 -0.642726 0.031837 0.700747 3.072450
##
## Coefficients: (1 dropped because of singularities)
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 5.3662e+00 1.7113e-01 31.3567 <2e-16 ***
## Miembros -2.9522e-02 2.8950e-02 -1.0198 0.3081
## Ingreso 8.9451e-05 3.9046e-06 22.9089 <2e-16 ***
## Gasto -9.9934e-05 4.8388e-06 -20.6524 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1888.5
## Residual Sum of Squares: 1015.8
## R-Squared: 0.46209
## Adj. R-Squared: 0.46047
## F-statistic: 285.209 on 3 and 996 DF, p-value: < 2.22e-16
within <- plm(Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro, data = df1, model = "within")
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro,
## data = df1, model = "within")
##
## Balanced Panel: n = 100, T = 10, N = 1000
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.47988 -0.64311 0.06577 0.60962 2.98563
##
## Coefficients: (1 dropped because of singularities)
## Estimate Std. Error t-value Pr(>|t|)
## Ingreso 9.2085e-05 6.7477e-06 13.6469 < 2.2e-16 ***
## Gasto -1.1352e-04 1.1653e-05 -9.7411 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1229.3
## Residual Sum of Squares: 940.52
## R-Squared: 0.23488
## Adj. R-Squared: 0.14882
## F-statistic: 137.836 on 2 and 898 DF, p-value: < 2.22e-16
pFtest(within, pooled)
##
## F test for individual effects
##
## data: Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro
## F = 0.73356, df1 = 98, df2 = 898, p-value = 0.9735
## alternative hypothesis: significant effects
walhus <- plm(Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro, data = df1, model = "random", random.method = "walhus")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro,
## data = df1, model = "random", random.method = "walhus")
##
## Balanced Panel: n = 100, T = 10, N = 1000
##
## Effects:
## var std.dev share
## idiosyncratic 1.047 1.023 1
## individual 0.000 0.000 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.522293 -0.642726 0.031837 0.700747 3.072450
##
## Coefficients: (1 dropped because of singularities)
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 5.3662e+00 1.7113e-01 31.3567 <2e-16 ***
## Miembros -2.9522e-02 2.8950e-02 -1.0198 0.3078
## Ingreso 8.9451e-05 3.9046e-06 22.9089 <2e-16 ***
## Gasto -9.9934e-05 4.8388e-06 -20.6524 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1888.5
## Residual Sum of Squares: 1015.8
## R-Squared: 0.46209
## Adj. R-Squared: 0.46047
## Chisq: 855.626 on 3 DF, p-value: < 2.22e-16
amemiya <- plm(Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro, data = df1, model = "random", random.method = "amemiya")
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro,
## data = df1, model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 100, T = 10, N = 1000
##
## Effects:
## var std.dev share
## idiosyncratic 1.045 1.022 1
## individual 0.000 0.000 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.522293 -0.642726 0.031837 0.700747 3.072450
##
## Coefficients: (1 dropped because of singularities)
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 5.3662e+00 1.7113e-01 31.3567 <2e-16 ***
## Miembros -2.9522e-02 2.8950e-02 -1.0198 0.3078
## Ingreso 8.9451e-05 3.9046e-06 22.9089 <2e-16 ***
## Gasto -9.9934e-05 4.8388e-06 -20.6524 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1888.5
## Residual Sum of Squares: 1015.8
## R-Squared: 0.46209
## Adj. R-Squared: 0.46047
## Chisq: 855.626 on 3 DF, p-value: < 2.22e-16
phtest(walhus,within)
##
## Hausman Test
##
## data: Satisfacción ~ Miembros + Ingreso + Gasto + Ahorro
## chisq = 1.8826, df = 2, p-value = 0.3901
## alternative hypothesis: one model is inconsistent
df2 <- df_population %>% group_by(year) %>% summarise("population"=sum(population))
ts <- ts(data=df2$population, start=1900, frequency=1)
arima <- auto.arima(ts)
summary(arima)
## Series: ts
## ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -0.6804 -0.1615
## s.e. 0.0927 0.0987
##
## sigma^2 = 1.102e+12: log likelihood = -1802.93
## AIC=3611.86 AICc=3612.07 BIC=3620.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 36421.56 1031987 442586.8 0.02677207 0.2636174 0.2058893 -0.011439
pronostico <- forecast(arima, level=95, h=5)
pronostico
## Point Forecast Lo 95 Hi 95
## 2020 330368874 328311638 332426110
## 2021 332609084 329202860 336015307
## 2022 334849293 330283643 339414944
## 2023 337089503 331417432 342761574
## 2024 339329713 332562452 346096974
plot(pronostico, main="Población total anual")
modelo <- '
#Regresiones
#Variables latentes
CalidadSuelo =~ SPH + NC + OM
CalidadAgua =~ CL + DO + WPH
SaludEcosistema =~ SD + BM + EP
#Varianzas y covarianzas
CalidadSuelo ~~ CalidadAgua
CalidadSuelo ~~ SaludEcosistema
CalidadAgua ~~ SaludEcosistema
#Intercepto
'
df3 <- scale(df_ecosistemas)
df4 <- cfa(modelo, df3)
## Warning: lavaan->lav_object_post_check():
## covariance matrix of latent variables is not positive definite ; use
## lavInspect(fit, "cov.lv") to investigate.
summary(df4)
## lavaan 0.6-19 ended normally after 97 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 21
##
## Number of observations 200
##
## Model Test User Model:
##
## Test statistic 17.149
## Degrees of freedom 24
## P-value (Chi-square) 0.842
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## CalidadSuelo =~
## SPH 1.000
## NC 2.217 1.332 1.664 0.096
## OM 0.167 0.402 0.414 0.679
## CalidadAgua =~
## CL 1.000
## DO -0.827 0.427 -1.936 0.053
## WPH 0.404 0.359 1.124 0.261
## SaludEcosistema =~
## SD 1.000
## BM -1.899 3.995 -0.475 0.634
## EP -4.224 8.093 -0.522 0.602
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## CalidadSuelo ~~
## CalidadAgua -0.079 0.051 -1.558 0.119
## SaludEcosistem -0.010 0.020 -0.497 0.619
## CalidadAgua ~~
## SaludEcosistem -0.018 0.034 -0.513 0.608
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .SPH 0.926 0.103 9.016 0.000
## .NC 0.657 0.226 2.906 0.004
## .OM 0.993 0.100 9.978 0.000
## .CL 0.953 0.118 8.086 0.000
## .DO 0.966 0.108 8.977 0.000
## .WPH 0.988 0.099 9.938 0.000
## .SD 0.992 0.100 9.954 0.000
## .BM 0.985 0.102 9.618 0.000
## .EP 0.948 0.166 5.720 0.000
## CalidadSuelo 0.069 0.057 1.197 0.231
## CalidadAgua 0.042 0.075 0.558 0.577
## SaludEcosistem 0.003 0.012 0.221 0.825
lavaanPlot(df4, coef=TRUE, cov=TRUE)