# Tema 3: Modelos de Ecuaciones Estructurales

###Calidad del Suelo:

SPH: pH del Suelo

NC: Contenido de Nutrientes

OM: Materia Orgánica

###Calidad del Agua:

CL: Niveles de Contaminantes

DO: Oxígeno Disuelto

WPH: pH del Agua

###Salud del Ecosistema

SD: Diversidad de Especies

BM: Biomasa

EP: Productividad del Ecosistema

#Cargar Base de datos y librerias
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# install.packages("gplots")
library(gplots)
## 
## Adjuntando el paquete: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
# install.packages("plm")
library(plm)
## 
## Adjuntando el paquete: 'plm'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
# install.packages("DataExplorer")
library(DataExplorer)

# install.packages("forecast")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# install.packages("lavaan")
library(lavaan)
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
# install.packages("lavaanPlot")
library(lavaanPlot)
library(readr)
df <- read.csv("~/ecosistema.csv")
modelo <- '
  # Variables latentes
  Calidad_del_Suelo     =~ SPH + NC + OM
  Calidad_del_Agua      =~ CL  + DO + WPH
  Salud_del_Ecosistema  =~ SD  + BM + EP
  # Covarianzas
  Calidad_del_Suelo ~~ Calidad_del_Agua
  Salud_del_Ecosistema ~~ Calidad_del_Agua
  Salud_del_Ecosistema ~~ Calidad_del_Suelo
'

## Generar el diagrama

df3 <- scale(df)          
df4 <- cfa(modelo, df3)
## Warning: lavaan->lav_object_post_check():  
##    covariance matrix of latent variables is not positive definite ; use 
##    lavInspect(fit, "cov.lv") to investigate.
summary(df4)
## lavaan 0.6-19 ended normally after 97 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        21
## 
##   Number of observations                           200
## 
## Model Test User Model:
##                                                       
##   Test statistic                                17.149
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.842
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                           Estimate  Std.Err  z-value  P(>|z|)
##   Calidad_del_Suelo =~                                       
##     SPH                      1.000                           
##     NC                       2.217    1.332    1.664    0.096
##     OM                       0.167    0.402    0.414    0.679
##   Calidad_del_Agua =~                                        
##     CL                       1.000                           
##     DO                      -0.827    0.427   -1.936    0.053
##     WPH                      0.404    0.359    1.124    0.261
##   Salud_del_Ecosistema =~                                    
##     SD                       1.000                           
##     BM                      -1.899    3.995   -0.475    0.634
##     EP                      -4.224    8.093   -0.522    0.602
## 
## Covariances:
##                        Estimate  Std.Err  z-value  P(>|z|)
##   Calidad_del_Suelo ~~                                    
##     Calidad_del_Ag       -0.079    0.051   -1.558    0.119
##   Calidad_del_Agua ~~                                     
##     Sald_dl_Ecsstm       -0.018    0.034   -0.513    0.608
##   Calidad_del_Suelo ~~                                    
##     Sald_dl_Ecsstm       -0.010    0.020   -0.497    0.619
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .SPH               0.926    0.103    9.016    0.000
##    .NC                0.657    0.226    2.906    0.004
##    .OM                0.993    0.100    9.978    0.000
##    .CL                0.953    0.118    8.086    0.000
##    .DO                0.966    0.108    8.977    0.000
##    .WPH               0.988    0.099    9.938    0.000
##    .SD                0.992    0.100    9.954    0.000
##    .BM                0.985    0.102    9.618    0.000
##    .EP                0.948    0.166    5.720    0.000
##     Calidad_del_Sl    0.069    0.057    1.197    0.231
##     Calidad_del_Ag    0.042    0.075    0.558    0.577
##     Sald_dl_Ecsstm    0.003    0.012    0.221    0.825
lavaanPlot(df4, coef = TRUE, cov = TRUE)

# Actividad Hogares

library(readxl)
df1 <- read_excel("~/hogares.xlsx")
summary(df1)
##     HogarID            Año          Miembros       Ingreso          Gasto      
##  Min.   :  1.00   Min.   :2010   Min.   :2.00   Min.   : 5039   Min.   : 3273  
##  1st Qu.: 25.75   1st Qu.:2012   1st Qu.:3.00   1st Qu.:21198   1st Qu.:14474  
##  Median : 50.50   Median :2014   Median :4.00   Median :26854   Median :20135  
##  Mean   : 50.50   Mean   :2014   Mean   :3.52   Mean   :27066   Mean   :19879  
##  3rd Qu.: 75.25   3rd Qu.:2017   3rd Qu.:4.25   3rd Qu.:32627   3rd Qu.:25113  
##  Max.   :100.00   Max.   :2019   Max.   :5.00   Max.   :50129   Max.   :34753  
##      Ahorro          Satisfacción   
##  Min.   :-23306.6   Min.   : 1.157  
##  1st Qu.:   304.7   1st Qu.: 4.796  
##  Median :  6608.9   Median : 5.667  
##  Mean   :  7186.3   Mean   : 5.697  
##  3rd Qu.: 13555.9   3rd Qu.: 6.607  
##  Max.   : 38655.5   Max.   :10.000
# Generar conjunto de datos de panel (índices correctos)
panel_1 <- pdata.frame(df1, index = c("HogarID","Año"))
head(panel_1)
##        HogarID  Año Miembros  Ingreso    Gasto    Ahorro Satisfacción
## 1-2010       1 2010        2 41372.72 23948.71 17424.005     5.544594
## 1-2011       1 2011        2 49362.29 25511.14 23851.155     6.456287
## 1-2012       1 2012        2 34227.85 24558.43  9669.414     5.484323
## 1-2013       1 2013        2 39474.82 26494.29 12980.538     6.416048
## 1-2014       1 2014        2 45965.89 30411.14 15554.746     4.116369
## 1-2015       1 2015        2 41671.21 25041.87 16629.338     6.499067

## Prueba de heterogenidad

plotmeans(Ingreso ~ HogarID,
          main = "Prueba de Heterogeneidad entre hogares para el ingreso",
          data = panel_1)

## Modelos de panel con las variables de hogares

# Modelo 1. Regresión agrupada (pooled)
pooled <- plm(Ingreso ~ Satisfacción, data = panel_1, model = "pooling")
summary(pooled)
## Pooling Model
## 
## Call:
## plm(formula = Ingreso ~ Satisfacción, data = panel_1, model = "pooling")
## 
## Balanced Panel: n = 100, T = 10, N = 1000
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -20196.53  -5106.46   -575.98   5095.02  23468.66 
## 
## Coefficients:
##              Estimate Std. Error t-value  Pr(>|t|)    
## (Intercept)  10597.75     976.80  10.850 < 2.2e-16 ***
## Satisfacción  2890.77     166.68  17.343 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    6.8145e+10
## Residual Sum of Squares: 5.2364e+10
## R-Squared:      0.23158
## Adj. R-Squared: 0.23081
## F-statistic: 300.772 on 1 and 998 DF, p-value: < 2.22e-16
# Modelo 2. Efectos Fijos (within)
within <- plm(Ingreso ~ Satisfacción, data = panel_1, model = "within")
summary(within)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = Ingreso ~ Satisfacción, data = panel_1, model = "within")
## 
## Balanced Panel: n = 100, T = 10, N = 1000
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -15591.951  -3123.123    -74.284   3010.168  13134.979 
## 
## Coefficients:
##              Estimate Std. Error t-value  Pr(>|t|)    
## Satisfacción  1698.14     132.73  12.794 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.3013e+10
## Residual Sum of Squares: 1.9469e+10
## R-Squared:      0.15403
## Adj. R-Squared: 0.05993
## F-statistic: 163.687 on 1 and 899 DF, p-value: < 2.22e-16
# Prueba F (pooled vs within)
pFtest(within, pooled)
## 
##  F test for individual effects
## 
## data:  Ingreso ~ Satisfacción
## F = 15.343, df1 = 99, df2 = 899, p-value < 2.2e-16
## alternative hypothesis: significant effects

# Modelos de efectos aleatorios

# Efectos aleatorios
walhus  <- plm(Ingreso ~ Satisfacción, data = panel_1, model = "random", random.method = "walhus")
amemiya <- plm(Ingreso ~ Satisfacción, data = panel_1, model = "random", random.method = "amemiya")
nerlove <- plm(Ingreso ~ Satisfacción, data = panel_1, model = "random", random.method = "nerlove")

summary(walhus); summary(amemiya); summary(nerlove)
## Oneway (individual) effect Random Effect Model 
##    (Wallace-Hussain's transformation)
## 
## Call:
## plm(formula = Ingreso ~ Satisfacción, data = panel_1, model = "random", 
##     random.method = "walhus")
## 
## Balanced Panel: n = 100, T = 10, N = 1000
## 
## Effects:
##                    var  std.dev share
## idiosyncratic 23574420     4855  0.45
## individual    28789336     5366  0.55
## theta: 0.7249
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -16507.33  -3220.23   -147.96   3184.91  15215.46 
## 
## Coefficients:
##              Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept)  16632.69     925.15  17.978 < 2.2e-16 ***
## Satisfacción  1831.41     131.69  13.907 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.6429e+10
## Residual Sum of Squares: 2.2139e+10
## R-Squared:      0.16233
## Adj. R-Squared: 0.16149
## Chisq: 193.404 on 1 DF, p-value: < 2.22e-16
## Oneway (individual) effect Random Effect Model 
##    (Amemiya's transformation)
## 
## Call:
## plm(formula = Ingreso ~ Satisfacción, data = panel_1, model = "random", 
##     random.method = "amemiya")
## 
## Balanced Panel: n = 100, T = 10, N = 1000
## 
## Effects:
##                    var  std.dev share
## idiosyncratic 21631698     4651 0.393
## individual    33418160     5781 0.607
## theta: 0.7534
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -16370.54  -3188.47   -210.78   3188.52  14905.18 
## 
## Coefficients:
##              Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept)  16777.35     953.98  17.587 < 2.2e-16 ***
## Satisfacción  1806.01     130.63  13.825 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.5757e+10
## Residual Sum of Squares: 2.1617e+10
## R-Squared:      0.16074
## Adj. R-Squared: 0.1599
## Chisq: 191.14 on 1 DF, p-value: < 2.22e-16
## Oneway (individual) effect Random Effect Model 
##    (Nerlove's transformation)
## 
## Call:
## plm(formula = Ingreso ~ Satisfacción, data = panel_1, model = "random", 
##     random.method = "nerlove")
## 
## Balanced Panel: n = 100, T = 10, N = 1000
## 
## Effects:
##                    var  std.dev share
## idiosyncratic 19468528     4412 0.351
## individual    35940737     5995 0.649
## theta: 0.7733
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -16275.51  -3113.76   -212.49   3188.29  14690.19 
## 
## Coefficients:
##              Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept)  16869.92     981.37  17.190 < 2.2e-16 ***
## Satisfacción  1789.76     129.95  13.773 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.5332e+10
## Residual Sum of Squares: 2.1286e+10
## R-Squared:      0.15972
## Adj. R-Squared: 0.15888
## Chisq: 189.701 on 1 DF, p-value: < 2.22e-16
# Hausman (fixed vs random). Usar 'swar' suele ser estable para la prueba:
random_swar <- plm(Ingreso ~ Satisfacción, data = panel_1, model = "random", random.method = "swar")
phtest(within, random_swar)
## 
##  Hausman Test
## 
## data:  Ingreso ~ Satisfacción
## chisq = 67.193, df = 1, p-value = 2.462e-16
## alternative hypothesis: one model is inconsistent

# Estados de Mexico

# devtools::install_github("diegovalle/mxmaps")
# library(mxmaps)
# df2 <- df_mxstate_2020
# df_mxstate_2020$value <- df2$pop   # reemplaza con tus valores
# mxstate_choropleth(df_mxstate_2020)

# Importar base de datos

df3 <- read.csv("~/population.csv")
df2 <- read.csv("~/population.csv")
df3 <- df2 %>% dplyr::filter(state == "TX")
ts_tx <- ts(df3$population, start = 1900, frequency = 1)

arima_tx <- auto.arima(ts_tx)
pronostico <- forecast(arima_tx, level = c(95), h = 31)
pronostico
##      Point Forecast    Lo 95    Hi 95
## 2020       29398472 29199487 29597457
## 2021       29806827 29463665 30149990
## 2022       30215183 29742956 30687410
## 2023       30623538 30024100 31222977
## 2024       31031894 30303359 31760429
## 2025       31440249 30579246 32301253
## 2026       31848605 30851090 32846119
## 2027       32256960 31118581 33395339
## 2028       32665316 31381587 33949044
## 2029       33073671 31640070 34507272
## 2030       33482027 31894047 35070007
## 2031       33890382 32143561 35637204
## 2032       34298738 32388674 36208801
## 2033       34707093 32629456 36784730
## 2034       35115449 32865983 37364914
## 2035       35523804 33098330 37949278
## 2036       35932160 33326573 38537746
## 2037       36340515 33550788 39130242
## 2038       36748871 33771046 39726695
## 2039       37157226 33987418 40327034
## 2040       37565581 34199972 40931191
## 2041       37973937 34408774 41539100
## 2042       38382292 34613887 42150698
## 2043       38790648 34815371 42765925
## 2044       39199003 35013284 43384723
## 2045       39607359 35207682 44007036
## 2046       40015714 35398618 44632810
## 2047       40424070 35586145 45261995
## 2048       40832425 35770311 45894540
## 2049       41240781 35951163 46530399
## 2050       41649136 36128748 47169524
plot(pronostico, main = "Población en Texas")