#RMD resumen sin la limpieza o el analisis bivariado #Data

library(rio)
l="https://github.com/AriannaNKZC/Estad-2/raw/master/avecita.csv"
data=import(l)

#analisis exploratorio (EFA)

#librerias

library(htmltab)
library(stringr)
library(polycor)
library(ggcorrplot)

## Loading required package: ggplot2

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:polycor':
## 
##     polyserial

library(matrixcalc)
library(GPArotation)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:rio':
## 
##     export

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(fpc)
library(cluster)
library(dbscan)

## 
## Attaching package: 'dbscan'

## The following object is masked from 'package:fpc':
## 
##     dbscan

library(BBmisc)

## 
## Attaching package: 'BBmisc'

## The following object is masked from 'package:base':
## 
##     isFALSE

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:BBmisc':
## 
##     coalesce, collapse

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#Regresion poisson Renombrando variables

DataR=data
names(DataR)[15]="Contagiados"
str(DataR)

## 'data.frame':    124 obs. of  21 variables:
##  $ Country                : chr  "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
##  $ HDI                    : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 81.8 77.8 76.5 ...
##  $ EXPECTCOLE             : num  10.1 11.8 13.3 13.6 17.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.16 10.95 10.56 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 48641 66912 17611 ...
##  $ infoalawk              : int  2 1 0 0 2 2 2 2 2 2 ...
##  $ %poburb18              : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ GS_2017                : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018               : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex        : num  27.78 33.33 0 2.78 11.11 ...
##  $ Apoyo Economico        : chr  "Sin apoyo" "Sin apoyo" "Sin apoyo" "Sin apoyo" ...
##  $ Densidad (2018)        : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019)  : int  24 7 4 2 8 6 6 5 7 1 ...
##  $ Contagiados            : int  16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
##  $ Regulatory_quality     : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co             : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw              : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco             : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta          : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ GEE                    : num  -1.457 -1.052 1.945 1.431 0.026 ...

DataR$Country=NULL

DataR$`Apoyo Economico` = as.factor(DataR$`Apoyo Economico`)
levels(DataR$`Apoyo Economico`) <- c("Sin apoyo", "Menos del 50% del sueldo", "Más del 50% del sueldo")
DataR$`Apoyo Economico` = as.ordered(DataR$`Apoyo Economico`)
table(DataR$`Apoyo Economico`)

## 
##                Sin apoyo Menos del 50% del sueldo 
##                        7                      117

DataR$infoalawk=as.ordered(DataR$infoalawk)
DataR$`Desempleo (% al 2019)`=as.numeric(DataR$`Desempleo (% al 2019)`)
DataR$Contagiados=as.numeric(DataR$Contagiados)
DataR$HDI=NULL

Regresion en si

Contagiados=glm(Contagiados~.,family = poisson, data=DataR)
summary(Contagiados)

## 
## Call:
## glm(formula = Contagiados ~ ., family = poisson, data = DataR)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -431.56  -147.23   -67.36    42.61   813.02  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              7.716e-01  1.395e-02   55.31   <2e-16 ***
## EXPECTATIVAVIDA          1.017e-01  2.107e-04  482.75   <2e-16 ***
## EXPECTCOLE               1.081e-01  3.863e-04  279.77   <2e-16 ***
## YEARS_SCHOOLING         -2.049e-01  4.064e-04 -504.15   <2e-16 ***
## GNI_GROSSNATIONALINCOME  3.971e-05  8.348e-08  475.62   <2e-16 ***
## infoalawk.L              6.185e-01  1.424e-03  434.33   <2e-16 ***
## infoalawk.Q              1.953e-01  1.545e-03  126.41   <2e-16 ***
## `%poburb18`              1.343e-02  4.448e-05  302.02   <2e-16 ***
## GS_2017                  1.119e-01  3.476e-04  321.84   <2e-16 ***
## PPP_2018                -4.794e-05  9.399e-08 -510.10   <2e-16 ***
## StringencyIndex         -2.466e-02  4.722e-05 -522.17   <2e-16 ***
## `Apoyo Economico`.L      8.868e-01  4.637e-03  191.22   <2e-16 ***
## `Densidad (2018)`       -2.554e-04  1.198e-06 -213.22   <2e-16 ***
## `Desempleo (% al 2019)` -1.394e-02  9.203e-05 -151.49   <2e-16 ***
## Regulatory_quality       1.396e+00  2.400e-03  581.65   <2e-16 ***
## control_co               3.619e-01  2.505e-03  144.49   <2e-16 ***
## Ruleoflaw               -5.516e-01  3.039e-03 -181.53   <2e-16 ***
## Voice_acco               8.178e-02  1.494e-03   54.73   <2e-16 ***
## Political_sta           -7.616e-01  1.186e-03 -641.92   <2e-16 ***
## GEE                     -7.715e-01  2.987e-03 -258.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 9737350  on 123  degrees of freedom
## Residual deviance: 4987598  on 104  degrees of freedom
## AIC: 4988921
## 
## Number of Fisher Scoring iterations: 7

exp(confint(Contagiados))

## Waiting for profiling to be done...

##                             2.5 %    97.5 %
## (Intercept)             2.1047857 2.2230810
## EXPECTATIVAVIDA         1.1066131 1.1075275
## EXPECTCOLE              1.1132745 1.1149613
## YEARS_SCHOOLING         0.8140994 0.8153973
## GNI_GROSSNATIONALINCOME 1.0000395 1.0000399
## infoalawk.L             1.8508981 1.8612584
## infoalawk.Q             1.2120421 1.2194059
## `%poburb18`             1.0134357 1.0136124
## GS_2017                 1.1176020 1.1191258
## PPP_2018                0.9999519 0.9999522
## StringencyIndex         0.9755522 0.9757328
## `Apoyo Economico`.L     2.4053523 2.4494777
## `Densidad (2018)`       0.9997422 0.9997469
## `Desempleo (% al 2019)` 0.9859772 0.9863330
## Regulatory_quality      4.0188266 4.0568060
## control_co              1.4290591 1.4431605
## Ruleoflaw               0.5725934 0.5794548
## Voice_acco              1.0820453 1.0884015
## Political_sta           0.4658261 0.4679976
## GEE                     0.4596397 0.4650523

library(AER)

## Loading required package: car

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:psych':
## 
##     logit

## Loading required package: lmtest

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: sandwich

## Loading required package: survival

AER::dispersiontest(Contagiados,trafo=1) #significativa

## 
##  Overdispersion test
## 
## data:  Contagiados
## z = 3.9754, p-value = 3.514e-05
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
##    alpha 
## 55084.63

Calculemos matriz de correlación:

theData = data
names(data)

##  [1] "Country"                 "HDI"                    
##  [3] "EXPECTATIVAVIDA"         "EXPECTCOLE"             
##  [5] "YEARS_SCHOOLING"         "GNI_GROSSNATIONALINCOME"
##  [7] "infoalawk"               "%poburb18"              
##  [9] "GS_2017"                 "PPP_2018"               
## [11] "StringencyIndex"         "Apoyo Economico"        
## [13] "Densidad (2018)"         "Desempleo (% al 2019)"  
## [15] "Contagd100"              "Regulatory_quality"     
## [17] "control_co"              "Ruleoflaw"              
## [19] "Voice_acco"              "Political_sta"          
## [21] "GEE"

data$PAIS = NULL
theData = (data[, c(2, 7:14, 16:21)])
theData$`Apoyo Economico` = as.factor(theData$`Apoyo Economico`)

theData$`Apoyo Economico` = as.numeric(theData$`Apoyo Economico`)
theData$`Apoyo Economico`=recode(theData$`Apoyo Economico`, "1=2; 2=1", as.factor = F)

table(theData$`Apoyo Economico`)

## 
##   1   2 
## 117   7

theData$infoalawk = as.numeric(theData$infoalawk)
table(theData$infoalawk)

## 
##  0  1  2 
## 18 19 87

str(theData)

## 'data.frame':    124 obs. of  15 variables:
##  $ HDI                  : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ infoalawk            : num  2 1 0 0 2 2 2 2 2 2 ...
##  $ %poburb18            : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ GS_2017              : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018             : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex      : num  27.78 33.33 0 2.78 11.11 ...
##  $ Apoyo Economico      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Densidad (2018)      : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019): int  24 7 4 2 8 6 6 5 7 1 ...
##  $ Regulatory_quality   : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co           : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw            : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco           : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta        : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ GEE                  : num  -1.457 -1.052 1.945 1.431 0.026 ...

#cambiando a nombres más bonitos

names(theData) = c("IDH", "Campañas informativas", "Población urbana (% al 2018)", "Gasto en Salud (2017)", "PBI per cápita (2018)", "Indice de Rigurosidad", "Apoyo Economico", "Densidad (2018)", "Desempleo (% al 2019)","RegulatoryQual","control_co", "ruleOflaw", "VoiceAccoutnability","EstabilidadPolitica","GEE")

corMatrix=polycor::hetcor(theData)$correlations

Explorar correlaciones:

ggcorrplot(corMatrix)

#evaluandos ignificancia
ggcorrplot(corMatrix,
          p.mat = cor_pmat(corMatrix),
          insig = "blank",
          title = "Gráfico 1: Matriz de correlación")

verificar si los datos se pueden factorizar

psych::KMO(corMatrix)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.88
## MSA for each item = 
##                          IDH        Campañas informativas 
##                         0.86                         0.43 
## Población urbana (% al 2018)        Gasto en Salud (2017) 
##                         0.87                         0.90 
##        PBI per cápita (2018)        Indice de Rigurosidad 
##                         0.96                         0.74 
##              Apoyo Economico              Densidad (2018) 
##                         0.38                         0.62 
##        Desempleo (% al 2019)               RegulatoryQual 
##                         0.80                         0.89 
##                   control_co                    ruleOflaw 
##                         0.89                         0.92 
##          VoiceAccoutnability          EstabilidadPolitica 
##                         0.87                         0.90 
##                          GEE 
##                         0.90

Verificar si la matriz de correlaciones es adecuada

cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05

## [1] FALSE

library(matrixcalc)
is.singular.matrix(corMatrix)

## [1] FALSE

determinar en cuantos factores o variables latentes podriamos redimensionar la data

fa.parallel(theData, fm = 'ML', fa = 'fa')

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

#instalar 'parameters' y 'n_factors'
library(parameters)
library(nFactors)

## Loading required package: lattice

## 
## Attaching package: 'nFactors'

## The following object is masked from 'package:lattice':
## 
##     parallel

library(see)
sugerencia=parameters::n_factors(corMatrix)

#instalar 'see'
# tenemos:
plot(sugerencia)

6. Redimensionar a numero menor de factores * Resultado inicial:

resfa <- fa(theData,nfactors = 2,cor = 'mixed',rotate ="varimax",fm="minres")

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## mixed.cor is deprecated, please use mixedCor.
print(resfa$loadings,cutoff = 0.5)

## 
## Loadings:
##                              MR1    MR2   
## IDH                           0.827       
## Campañas informativas                     
## Población urbana (% al 2018)  0.598       
## Gasto en Salud (2017)                     
## PBI per cápita (2018)         0.819       
## Indice de Rigurosidad                0.973
## Apoyo Economico                           
## Densidad (2018)                           
## Desempleo (% al 2019)                     
## RegulatoryQual                0.942       
## control_co                    0.941       
## ruleOflaw                     0.957       
## VoiceAccoutnability           0.771       
## EstabilidadPolitica           0.816       
## GEE                           0.942       
## 
##                  MR1   MR2
## SS loadings    6.860 1.741
## Proportion Var 0.457 0.116
## Cumulative Var 0.457 0.573

resultado visual

fa.diagram(resfa, main = c("Gráfico 2: Árbol de factorización del primer modelo"))

Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?

resfa$crms #si

## [1] 0.06416853

¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?

resfa$RMSEA #no pasa

##      RMSEA      lower      upper confidence 
##  0.1475114  0.1300374  0.1670539  0.9000000

¿El índice de Tucker-Lewis es mayor a 0.9?

resfa$TLI #0.82

## [1] 0.8261077

¿Qué variables aportaron mas a los factores?

sort(resfa$communality)

##              Densidad (2018)        Desempleo (% al 2019) 
##                   0.02764488                   0.07529211 
##        Gasto en Salud (2017)        Campañas informativas 
##                   0.18374782                   0.23984585 
##              Apoyo Economico Población urbana (% al 2018) 
##                   0.24646787                   0.41566714 
##          VoiceAccoutnability          EstabilidadPolitica 
##                   0.59675656                   0.66828487 
##        PBI per cápita (2018)                          IDH 
##                   0.71870553                   0.73985075 
##                   control_co               RegulatoryQual 
##                   0.89421306                   0.90861209 
##                          GEE                    ruleOflaw 
##                   0.94065178                   0.94474044 
##        Indice de Rigurosidad 
##                   1.00064695

¿Qué variables contribuyen a mas de un factor? #conviene que salga 1

sort(resfa$complexity) #desempleo

##              Apoyo Economico          EstabilidadPolitica 
##                     1.002875                     1.006240 
##          VoiceAccoutnability        Campañas informativas 
##                     1.008069                     1.018266 
##                   control_co        Gasto en Salud (2017) 
##                     1.020791                     1.025106 
##               RegulatoryQual                    ruleOflaw 
##                     1.048192                     1.061428 
##        Indice de Rigurosidad                          GEE 
##                     1.114654                     1.121439 
##        PBI per cápita (2018)                          IDH 
##                     1.140260                     1.162113 
##              Densidad (2018) Población urbana (% al 2018) 
##                     1.162977                     1.316069 
##        Desempleo (% al 2019) 
##                     1.891938

factorial_casos<-as.data.frame(resfa$scores) 
head(factorial_casos)

summary(factorial_casos)

##       MR1               MR2         
##  Min.   :-2.1983   Min.   :-1.9845  
##  1st Qu.:-0.7568   1st Qu.:-0.6991  
##  Median :-0.1731   Median :-0.2371  
##  Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.7598   3rd Qu.: 0.6143  
##  Max.   : 1.9421   Max.   : 3.2857

#Alternativa 2 Calculemos matriz de correlación:

laData = data
laData = (data[, c(2, 7:14, 16:21)])
names(laData)

##  [1] "HDI"                   "infoalawk"             "%poburb18"            
##  [4] "GS_2017"               "PPP_2018"              "StringencyIndex"      
##  [7] "Apoyo Economico"       "Densidad (2018)"       "Desempleo (% al 2019)"
## [10] "Regulatory_quality"    "control_co"            "Ruleoflaw"            
## [13] "Voice_acco"            "Political_sta"         "GEE"

laData$`Apoyo Economico` = as.factor(laData$`Apoyo Economico`)

laData$`Apoyo Economico` = as.numeric(laData$`Apoyo Economico`)
laData$`Apoyo Economico`=recode(laData$`Apoyo Economico`, "1=2; 2=1", as.factor = F)

laData$infoalawk = as.numeric(laData$infoalawk)
table(laData$`Apoyo Economico`)

## 
##   1   2 
## 117   7

str(laData)

## 'data.frame':    124 obs. of  15 variables:
##  $ HDI                  : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ infoalawk            : num  2 1 0 0 2 2 2 2 2 2 ...
##  $ %poburb18            : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ GS_2017              : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018             : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex      : num  27.78 33.33 0 2.78 11.11 ...
##  $ Apoyo Economico      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Densidad (2018)      : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019): int  24 7 4 2 8 6 6 5 7 1 ...
##  $ Regulatory_quality   : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co           : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw            : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco           : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta        : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ GEE                  : num  -1.457 -1.052 1.945 1.431 0.026 ...

#trabajando poburabano
laData$`%poburb18` = NULL

corMatrixw=polycor::hetcor(laData)$correlations

Explorar correlaciones:

ggcorrplot(corMatrixw)

#evaluandos ignificancia
ggcorrplot(corMatrixw,
          p.mat = cor_pmat(corMatrixw),
          insig = "blank",
          title = "Gráfico 5: Matriz de correlación")

verificar si los datos se pueden factorizar

psych::KMO(corMatrixw)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrixw)
## Overall MSA =  0.88
## MSA for each item = 
##                   HDI             infoalawk               GS_2017 
##                  0.90                  0.44                  0.88 
##              PPP_2018       StringencyIndex       Apoyo Economico 
##                  0.95                  0.72                  0.40 
##       Densidad (2018) Desempleo (% al 2019)    Regulatory_quality 
##                  0.61                  0.81                  0.88 
##            control_co             Ruleoflaw            Voice_acco 
##                  0.90                  0.91                  0.86 
##         Political_sta                   GEE 
##                  0.89                  0.89

Verificar si la matriz de correlaciones es adecuada

cortest.bartlett(corMatrixw,n=nrow(laData))$p.value>0.05

## [1] FALSE

is.singular.matrix(corMatrixw)

## [1] FALSE

determinar en cuantos factores o variables latentes podriamos redimensionar la data

fa.parallel(laData, fm = 'ML', fa = 'fa')

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

#instalar 'parameters' y 'n_factors'
sugerencia1=parameters::n_factors(corMatrixw)
#instalar 'see'
# tenemos:
plot(sugerencia1)

6. Redimensionar a numero menor de factores * Resultado inicial:

resfa_2 <- fa(laData,nfactors = 2,cor = 'mixed',rotate ="varimax",fm="minres")

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## mixed.cor is deprecated, please use mixedCor.
print(resfa_2$loadings,cutoff = 0.5)

## 
## Loadings:
##                       MR1    MR2   
## HDI                    0.807       
## infoalawk                          
## GS_2017                            
## PPP_2018               0.813       
## StringencyIndex               0.977
## Apoyo Economico                    
## Densidad (2018)                    
## Desempleo (% al 2019)              
## Regulatory_quality     0.943       
## control_co             0.946       
## Ruleoflaw              0.964       
## Voice_acco             0.777       
## Political_sta          0.819       
## GEE                    0.944       
## 
##                  MR1   MR2
## SS loadings    6.498 1.690
## Proportion Var 0.464 0.121
## Cumulative Var 0.464 0.585

resultado visual

fa.diagram(resfa_2, main = c("Gráfico 2: Árbol de factorización del primer modelo"))

Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?

resfa_2$crms #si

## [1] 0.06244545

¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?

resfa_2$RMSEA #no

##      RMSEA      lower      upper confidence 
##  0.1385199  0.1191850  0.1599397  0.9000000

¿El índice de Tucker-Lewis es mayor a 0.9?

resfa_2$TLI #0.85

## [1] 0.8590452

¿Qué variables aportaron mas a los factores?

sort(resfa_2$communality)

##       Densidad (2018) Desempleo (% al 2019)               GS_2017 
##            0.02614604            0.07134946            0.18243492 
##             infoalawk       Apoyo Economico            Voice_acco 
##            0.23932166            0.25013174            0.60546233 
##         Political_sta                   HDI              PPP_2018 
##            0.67232630            0.69956305            0.70735966 
##            control_co    Regulatory_quality                   GEE 
##            0.90542534            0.91104046            0.94746452 
##             Ruleoflaw       StringencyIndex 
##            0.95960930            1.01025090

¿Qué variables contribuyen a mas de un factor? #conviene que salga 1

sort(resfa_2$complexity)

##       Apoyo Economico         Political_sta            Voice_acco 
##              1.002896              1.005226              1.005356 
##             infoalawk            control_co               GS_2017 
##              1.017275              1.023719              1.026774 
##    Regulatory_quality             Ruleoflaw       StringencyIndex 
##              1.050215              1.066870              1.114701 
##                   GEE              PPP_2018       Densidad (2018) 
##              1.125615              1.137687              1.138584 
##                   HDI Desempleo (% al 2019) 
##              1.145359              1.874363

factorial_casos2<-as.data.frame(resfa_2$scores) #en esta no me sale el factorial
head(factorial_casos2)

summary(factorial_casos2)

##       MR1               MR2         
##  Min.   :-2.1445   Min.   :-2.1021  
##  1st Qu.:-0.7387   1st Qu.:-0.7215  
##  Median :-0.1786   Median :-0.2346  
##  Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.7666   3rd Qu.: 0.6181  
##  Max.   : 1.9603   Max.   : 3.3280

#Mergear factoriales

ABCD=cbind(data[1],as.data.frame(resfa$scores))

data$capacidad=normalize(ABCD$MR1, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))
data$medidas=normalize(ABCD$MR2, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))

CCMA=cbind(data[1],as.data.frame(resfa_2$scores))

data$CAPACIDADsinURB_ABC= normalize(CCMA$MR1, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))
data$MEDIDAS2_ABC=normalize(CCMA$MR2, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))

regresión posison

library(ggpubr) #gráfico para ver normalidad
library(scatterplot3d)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(lmtest)

Sin utilizar factoriales

#infoawk (o como se escriba)

str(data$infoalawk)

##  int [1:124] 2 1 0 0 2 2 2 2 2 2 ...

data$infoalawk = as.ordered(data$infoalawk)
levels(data$infoalawk) = c("Ninguna", "Campañas del gobierno", "Campañas integrales")
table(data$infoalawk)

## 
##               Ninguna Campañas del gobierno   Campañas integrales 
##                    18                    19                    87

#apoyo economico

data$`Apoyo Economico` = as.factor(data$`Apoyo Economico`)
levels(data$`Apoyo Economico`) <- c("Sin apoyo", "Menos del 50% del sueldo", "Más del 50% del sueldo")
data$`Apoyo Economico` = as.ordered(data$`Apoyo Economico`)
table(data$`Apoyo Economico`)

## 
##                Sin apoyo Menos del 50% del sueldo 
##                        7                      117

names(data)

##  [1] "Country"                 "HDI"                    
##  [3] "EXPECTATIVAVIDA"         "EXPECTCOLE"             
##  [5] "YEARS_SCHOOLING"         "GNI_GROSSNATIONALINCOME"
##  [7] "infoalawk"               "%poburb18"              
##  [9] "GS_2017"                 "PPP_2018"               
## [11] "StringencyIndex"         "Apoyo Economico"        
## [13] "Densidad (2018)"         "Desempleo (% al 2019)"  
## [15] "Contagd100"              "Regulatory_quality"     
## [17] "control_co"              "Ruleoflaw"              
## [19] "Voice_acco"              "Political_sta"          
## [21] "GEE"                     "capacidad"              
## [23] "medidas"                 "CAPACIDADsinURB_ABC"    
## [25] "MEDIDAS2_ABC"

data_regre = data
data_regre$Country = NULL
str(data_regre)

## 'data.frame':    124 obs. of  24 variables:
##  $ HDI                    : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 81.8 77.8 76.5 ...
##  $ EXPECTCOLE             : num  10.1 11.8 13.3 13.6 17.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.16 10.95 10.56 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 48641 66912 17611 ...
##  $ infoalawk              : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
##  $ %poburb18              : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ GS_2017                : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018               : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex        : num  27.78 33.33 0 2.78 11.11 ...
##  $ Apoyo Economico        : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Densidad (2018)        : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019)  : int  24 7 4 2 8 6 6 5 7 1 ...
##  $ Contagd100             : int  16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
##  $ Regulatory_quality     : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co             : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw              : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco             : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta          : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ GEE                    : num  -1.457 -1.052 1.945 1.431 0.026 ...
##  $ capacidad              : num  1.03 2.42 8.3 7.26 4.4 ...
##  $ medidas                : num  3.054 3.015 0.883 1.02 2.221 ...
##  $ CAPACIDADsinURB_ABC    : num  0.852 2.271 8.402 7.163 4.144 ...
##  $ MEDIDAS2_ABC           : num  3.07 3.29 1.06 1.27 2.6 ...

data_regre$`Desempleo (% al 2019)`=as.numeric(data_regre$`Desempleo (% al 2019)`)
names(data_regre)[14]="Contagiados"
data_regre$Contagiados=as.numeric(data_regre$Contagiados)

regresion

QUIERO = formula(Contagiados ~ data_regre$capacidad + data_regre$medidas + data_regre$`Desempleo (% al 2019)` + data_regre$`Densidad (2018)`)
totalidad = glm(QUIERO, family=poisson, data=data_regre) 
summary(totalidad)

## 
## Call:
## glm(formula = QUIERO, family = poisson, data = data_regre)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -364.5  -209.1  -144.8   -30.0  1556.0  
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         1.045e+01  1.698e-03  6154.2   <2e-16 ***
## data_regre$capacidad                1.111e-01  2.059e-04   539.7   <2e-16 ***
## data_regre$medidas                 -2.126e-01  3.192e-04  -666.0   <2e-16 ***
## data_regre$`Desempleo (% al 2019)`  1.650e-03  5.690e-05    29.0   <2e-16 ***
## data_regre$`Densidad (2018)`       -1.158e-04  8.413e-07  -137.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 9737350  on 123  degrees of freedom
## Residual deviance: 8938426  on 119  degrees of freedom
## AIC: 8939719
## 
## Number of Fisher Scoring iterations: 8

exp(confint(totalidad))

## Waiting for profiling to be done...

##                                           2.5 %       97.5 %
## (Intercept)                        3.441116e+04 3.464096e+04
## data_regre$capacidad               1.117072e+00 1.117974e+00
## data_regre$medidas                 8.079677e-01 8.089794e-01
## data_regre$`Desempleo (% al 2019)` 1.001540e+00 1.001763e+00
## data_regre$`Densidad (2018)`       9.998825e-01 9.998858e-01

AER::dispersiontest(totalidad,trafo=1) #significativa

## 
##  Overdispersion test
## 
## data:  totalidad
## z = 1.8402, p-value = 0.03287
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
##    alpha 
## 154143.4

QUE = formula(Contagiados ~ data_regre$CAPACIDADsinURB_ABC+data_regre$MEDIDAS2_ABC+data_regre$`%poburb18`)
totalidad2 = glm(QUE, family=poisson, data=data_regre) 
summary(totalidad2)

## 
## Call:
## glm(formula = QUE, family = poisson, data = data_regre)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -408.20  -207.04  -118.81   -20.33  1274.85  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     8.693e+00  2.573e-03  3378.8   <2e-16 ***
## data_regre$CAPACIDADsinURB_ABC -5.991e-02  2.381e-04  -251.6   <2e-16 ***
## data_regre$MEDIDAS2_ABC        -1.181e-01  3.219e-04  -367.0   <2e-16 ***
## data_regre$`%poburb18`          3.475e-02  3.316e-05  1047.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 9737350  on 123  degrees of freedom
## Residual deviance: 7922074  on 120  degrees of freedom
## AIC: 7923365
## 
## Number of Fisher Scoring iterations: 6

exp(confint(totalidad2))

## Waiting for profiling to be done...

##                                       2.5 %       97.5 %
## (Intercept)                    5928.3644525 5988.4527961
## data_regre$CAPACIDADsinURB_ABC    0.9414135    0.9422926
## data_regre$MEDIDAS2_ABC           0.8880265    0.8891477
## data_regre$`%poburb18`            1.0352921    1.0354267

AER::dispersiontest(totalidad2,trafo=1) #significativa

## 
##  Overdispersion test
## 
## data:  totalidad2
## z = 2.6699, p-value = 0.003794
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
##    alpha 
## 103725.9

#Viendo el valor de Urbana sería mejor verla como cluster?

##5 Usando clusters: s epueden usar en regresion clusters para crear una ordinal

set.seed(123)
# creemos clusters con las variable relacionadas con felicidad
str(data)

## 'data.frame':    124 obs. of  25 variables:
##  $ Country                : chr  "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
##  $ HDI                    : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 81.8 77.8 76.5 ...
##  $ EXPECTCOLE             : num  10.1 11.8 13.3 13.6 17.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.16 10.95 10.56 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 48641 66912 17611 ...
##  $ infoalawk              : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
##  $ %poburb18              : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ GS_2017                : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018               : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex        : num  27.78 33.33 0 2.78 11.11 ...
##  $ Apoyo Economico        : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Densidad (2018)        : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019)  : int  24 7 4 2 8 6 6 5 7 1 ...
##  $ Contagd100             : int  16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
##  $ Regulatory_quality     : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co             : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw              : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco             : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta          : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ GEE                    : num  -1.457 -1.052 1.945 1.431 0.026 ...
##  $ capacidad              : num  1.03 2.42 8.3 7.26 4.4 ...
##  $ medidas                : num  3.054 3.015 0.883 1.02 2.221 ...
##  $ CAPACIDADsinURB_ABC    : num  0.852 2.271 8.402 7.163 4.144 ...
##  $ MEDIDAS2_ABC           : num  3.07 3.29 1.06 1.27 2.6 ...

dist=daisy(data[8], metric="gower")

library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

fviz_nbclust(data[8], pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

res.pam = pam(dist,6,cluster.only = F)

fviz_silhouette(res.pam)

##   cluster size ave.sil.width
## 1       1   16          0.61
## 2       2   21          0.61
## 3       3   24          0.53
## 4       4   26          0.61
## 5       5   19          0.53
## 6       6   18          0.64

data$pamURB=res.pam$clustering

aggregate(data$`%poburb18`~pamURB,data = data,FUN = mean) #primero tengo que ver esten eordenno siempre lo están si no está que reordenar, no convertir en clsuters sin hacerlo...

data$pamURB=recode(data$pamURB, "1=1; 5=2; 4=3; 2=4; 6=5; 3=6", as.factor = T)

Va como ascendente ahora

#como ordinal!
data$pamURB=as.ordered(data$pamURB)
str(data)

## 'data.frame':    124 obs. of  26 variables:
##  $ Country                : chr  "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
##  $ HDI                    : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 81.8 77.8 76.5 ...
##  $ EXPECTCOLE             : num  10.1 11.8 13.3 13.6 17.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.16 10.95 10.56 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 48641 66912 17611 ...
##  $ infoalawk              : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
##  $ %poburb18              : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ GS_2017                : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018               : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex        : num  27.78 33.33 0 2.78 11.11 ...
##  $ Apoyo Economico        : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Densidad (2018)        : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019)  : int  24 7 4 2 8 6 6 5 7 1 ...
##  $ Contagd100             : int  16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
##  $ Regulatory_quality     : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co             : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw              : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco             : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta          : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ GEE                    : num  -1.457 -1.052 1.945 1.431 0.026 ...
##  $ capacidad              : num  1.03 2.42 8.3 7.26 4.4 ...
##  $ medidas                : num  3.054 3.015 0.883 1.02 2.221 ...
##  $ CAPACIDADsinURB_ABC    : num  0.852 2.271 8.402 7.163 4.144 ...
##  $ MEDIDAS2_ABC           : num  3.07 3.29 1.06 1.27 2.6 ...
##  $ pamURB                 : Ord.factor w/ 6 levels "1"<"2"<"3"<"4"<..: 1 4 6 6 6 6 3 3 6 2 ...

QUEda = formula(Contagiados ~ data$CAPACIDADsinURB_ABC+data$MEDIDAS2_ABC+data$pamURB)
totalidad3 = glm(QUEda, family=poisson, data=data_regre) 
summary(totalidad3)

## 
## Call:
## glm(formula = QUEda, family = poisson, data = data_regre)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -450.04  -162.97  -104.43    29.13  1345.54  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              10.4626205  0.0018163 5760.32   <2e-16 ***
## data$CAPACIDADsinURB_ABC -0.0577846  0.0002408 -239.94   <2e-16 ***
## data$MEDIDAS2_ABC        -0.0884593  0.0003345 -264.44   <2e-16 ***
## data$pamURB.L             2.5628125  0.0032078  798.92   <2e-16 ***
## data$pamURB.Q            -0.6363585  0.0029150 -218.30   <2e-16 ***
## data$pamURB.C            -0.0602533  0.0022474  -26.81   <2e-16 ***
## data$pamURB^4            -0.9925754  0.0017264 -574.94   <2e-16 ***
## data$pamURB^5             0.0491363  0.0016106   30.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 9737350  on 123  degrees of freedom
## Residual deviance: 6806056  on 116  degrees of freedom
## AIC: 6807355
## 
## Number of Fisher Scoring iterations: 6

exp(confint(totalidad3))

## Waiting for profiling to be done...

##                                 2.5 %       97.5 %
## (Intercept)              3.485875e+04 3.510783e+04
## data$CAPACIDADsinURB_ABC 9.434078e-01 9.442988e-01
## data$MEDIDAS2_ABC        9.147402e-01 9.159406e-01
## data$pamURB.L            1.289104e+01 1.305417e+01
## data$pamURB.Q            5.261973e-01 5.322446e-01
## data$pamURB.C            9.373911e-01 9.456859e-01
## data$pamURB^4            3.693688e-01 3.718770e-01
## data$pamURB^5            1.047053e+00 1.053685e+00

AER::dispersiontest(totalidad3,trafo=1) #significativa

## 
##  Overdispersion test
## 
## data:  totalidad3
## z = 2.0065, p-value = 0.0224
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
##    alpha 
## 92310.97

#LINEALIDAD
plot(totalidad3, 1, main = c("Gráfico 2: Linealidad"))  #diagonal, casi lineal

B. Homocedasticidad.

plot(totalidad3, 3, main = c("Gráfico 3: Homocedasticidad"))#diagonal

bptest(totalidad3) #valor P mayor a 0.05 Homocedasticidad

## 
##  studentized Breusch-Pagan test
## 
## data:  totalidad3
## BP = 7.2904, df = 7, p-value = 0.3993

c. Normalidad de residuos. Puntos cerca de la diagonal.

plot(totalidad3, 2, main = c("Gráfico 4: Normalidad de residuos")) #se alejan de diagonal

shapiro.test(totalidad3$residuals) #menor a 0.05 el valor P entonces indica que no hay normaldiad de residusos

## 
##  Shapiro-Wilk normality test
## 
## data:  totalidad3$residuals
## W = 0.57632, p-value < 2.2e-16

library(DescTools)

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:car':
## 
##     Recode

## The following object is masked from 'package:BBmisc':
## 
##     %nin%

## The following objects are masked from 'package:psych':
## 
##     AUC, ICC, SD

VIF(totalidad3)

##                              GVIF Df GVIF^(1/(2*Df))
## data$CAPACIDADsinURB_ABC 1.311125  1        1.145044
## data$MEDIDAS2_ABC        1.062234  1        1.030647
## data$pamURB              1.356730  5        1.030978

5.2 ver valores influyentes Prestar atención al indice de Cook.

plot(totalidad3, 5, main = c("Gráfico 5: Identificación de valores influyentes"))

checktotalidad3=as.data.frame(influence.measures(totalidad3)$is.inf)

checktotalidad3[checktotalidad3$cook.d | checktotalidad3$hat,] #22 y 73

R Notebook

regresión posison

regresion