#RMD resumen sin la limpieza o el analisis bivariado #Data
library(rio)
l="https://github.com/AriannaNKZC/Estad-2/raw/master/avecita.csv"
data=import(l)
#analisis exploratorio (EFA)
#librerias
library(htmltab)
library(stringr)
library(polycor)
library(ggcorrplot)
## Loading required package: ggplot2
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:polycor':
##
## polyserial
library(matrixcalc)
library(GPArotation)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:rio':
##
## export
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(fpc)
library(cluster)
library(dbscan)
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:fpc':
##
## dbscan
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:BBmisc':
##
## coalesce, collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Regresion poisson Renombrando variables
DataR=data
names(DataR)[15]="Contagiados"
str(DataR)
## 'data.frame': 124 obs. of 21 variables:
## $ Country : chr "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 81.8 77.8 76.5 ...
## $ EXPECTCOLE : num 10.1 11.8 13.3 13.6 17.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.16 10.95 10.56 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 48641 66912 17611 ...
## $ infoalawk : int 2 1 0 0 2 2 2 2 2 2 ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Apoyo Economico : chr "Sin apoyo" "Sin apoyo" "Sin apoyo" "Sin apoyo" ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019) : int 24 7 4 2 8 6 6 5 7 1 ...
## $ Contagiados : int 16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
DataR$Country=NULL
DataR$`Apoyo Economico` = as.factor(DataR$`Apoyo Economico`)
levels(DataR$`Apoyo Economico`) <- c("Sin apoyo", "Menos del 50% del sueldo", "Más del 50% del sueldo")
DataR$`Apoyo Economico` = as.ordered(DataR$`Apoyo Economico`)
table(DataR$`Apoyo Economico`)
##
## Sin apoyo Menos del 50% del sueldo
## 7 117
DataR$infoalawk=as.ordered(DataR$infoalawk)
DataR$`Desempleo (% al 2019)`=as.numeric(DataR$`Desempleo (% al 2019)`)
DataR$Contagiados=as.numeric(DataR$Contagiados)
DataR$HDI=NULL
Regresion en si
Contagiados=glm(Contagiados~.,family = poisson, data=DataR)
summary(Contagiados)
##
## Call:
## glm(formula = Contagiados ~ ., family = poisson, data = DataR)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -431.56 -147.23 -67.36 42.61 813.02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.716e-01 1.395e-02 55.31 <2e-16 ***
## EXPECTATIVAVIDA 1.017e-01 2.107e-04 482.75 <2e-16 ***
## EXPECTCOLE 1.081e-01 3.863e-04 279.77 <2e-16 ***
## YEARS_SCHOOLING -2.049e-01 4.064e-04 -504.15 <2e-16 ***
## GNI_GROSSNATIONALINCOME 3.971e-05 8.348e-08 475.62 <2e-16 ***
## infoalawk.L 6.185e-01 1.424e-03 434.33 <2e-16 ***
## infoalawk.Q 1.953e-01 1.545e-03 126.41 <2e-16 ***
## `%poburb18` 1.343e-02 4.448e-05 302.02 <2e-16 ***
## GS_2017 1.119e-01 3.476e-04 321.84 <2e-16 ***
## PPP_2018 -4.794e-05 9.399e-08 -510.10 <2e-16 ***
## StringencyIndex -2.466e-02 4.722e-05 -522.17 <2e-16 ***
## `Apoyo Economico`.L 8.868e-01 4.637e-03 191.22 <2e-16 ***
## `Densidad (2018)` -2.554e-04 1.198e-06 -213.22 <2e-16 ***
## `Desempleo (% al 2019)` -1.394e-02 9.203e-05 -151.49 <2e-16 ***
## Regulatory_quality 1.396e+00 2.400e-03 581.65 <2e-16 ***
## control_co 3.619e-01 2.505e-03 144.49 <2e-16 ***
## Ruleoflaw -5.516e-01 3.039e-03 -181.53 <2e-16 ***
## Voice_acco 8.178e-02 1.494e-03 54.73 <2e-16 ***
## Political_sta -7.616e-01 1.186e-03 -641.92 <2e-16 ***
## GEE -7.715e-01 2.987e-03 -258.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 9737350 on 123 degrees of freedom
## Residual deviance: 4987598 on 104 degrees of freedom
## AIC: 4988921
##
## Number of Fisher Scoring iterations: 7
exp(confint(Contagiados))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 2.1047857 2.2230810
## EXPECTATIVAVIDA 1.1066131 1.1075275
## EXPECTCOLE 1.1132745 1.1149613
## YEARS_SCHOOLING 0.8140994 0.8153973
## GNI_GROSSNATIONALINCOME 1.0000395 1.0000399
## infoalawk.L 1.8508981 1.8612584
## infoalawk.Q 1.2120421 1.2194059
## `%poburb18` 1.0134357 1.0136124
## GS_2017 1.1176020 1.1191258
## PPP_2018 0.9999519 0.9999522
## StringencyIndex 0.9755522 0.9757328
## `Apoyo Economico`.L 2.4053523 2.4494777
## `Densidad (2018)` 0.9997422 0.9997469
## `Desempleo (% al 2019)` 0.9859772 0.9863330
## Regulatory_quality 4.0188266 4.0568060
## control_co 1.4290591 1.4431605
## Ruleoflaw 0.5725934 0.5794548
## Voice_acco 1.0820453 1.0884015
## Political_sta 0.4658261 0.4679976
## GEE 0.4596397 0.4650523
library(AER)
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
AER::dispersiontest(Contagiados,trafo=1) #significativa
##
## Overdispersion test
##
## data: Contagiados
## z = 3.9754, p-value = 3.514e-05
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
## alpha
## 55084.63
Calculemos matriz de correlación:
theData = data
names(data)
## [1] "Country" "HDI"
## [3] "EXPECTATIVAVIDA" "EXPECTCOLE"
## [5] "YEARS_SCHOOLING" "GNI_GROSSNATIONALINCOME"
## [7] "infoalawk" "%poburb18"
## [9] "GS_2017" "PPP_2018"
## [11] "StringencyIndex" "Apoyo Economico"
## [13] "Densidad (2018)" "Desempleo (% al 2019)"
## [15] "Contagd100" "Regulatory_quality"
## [17] "control_co" "Ruleoflaw"
## [19] "Voice_acco" "Political_sta"
## [21] "GEE"
data$PAIS = NULL
theData = (data[, c(2, 7:14, 16:21)])
theData$`Apoyo Economico` = as.factor(theData$`Apoyo Economico`)
theData$`Apoyo Economico` = as.numeric(theData$`Apoyo Economico`)
theData$`Apoyo Economico`=recode(theData$`Apoyo Economico`, "1=2; 2=1", as.factor = F)
table(theData$`Apoyo Economico`)
##
## 1 2
## 117 7
theData$infoalawk = as.numeric(theData$infoalawk)
table(theData$infoalawk)
##
## 0 1 2
## 18 19 87
str(theData)
## 'data.frame': 124 obs. of 15 variables:
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ infoalawk : num 2 1 0 0 2 2 2 2 2 2 ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Apoyo Economico : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019): int 24 7 4 2 8 6 6 5 7 1 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
#cambiando a nombres más bonitos
names(theData) = c("IDH", "Campañas informativas", "Población urbana (% al 2018)", "Gasto en Salud (2017)", "PBI per cápita (2018)", "Indice de Rigurosidad", "Apoyo Economico", "Densidad (2018)", "Desempleo (% al 2019)","RegulatoryQual","control_co", "ruleOflaw", "VoiceAccoutnability","EstabilidadPolitica","GEE")
corMatrix=polycor::hetcor(theData)$correlations
Explorar correlaciones:
ggcorrplot(corMatrix)
#evaluandos ignificancia
ggcorrplot(corMatrix,
p.mat = cor_pmat(corMatrix),
insig = "blank",
title = "Gráfico 1: Matriz de correlación")
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.88
## MSA for each item =
## IDH Campañas informativas
## 0.86 0.43
## Población urbana (% al 2018) Gasto en Salud (2017)
## 0.87 0.90
## PBI per cápita (2018) Indice de Rigurosidad
## 0.96 0.74
## Apoyo Economico Densidad (2018)
## 0.38 0.62
## Desempleo (% al 2019) RegulatoryQual
## 0.80 0.89
## control_co ruleOflaw
## 0.89 0.92
## VoiceAccoutnability EstabilidadPolitica
## 0.87 0.90
## GEE
## 0.90
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
#instalar 'parameters' y 'n_factors'
library(parameters)
library(nFactors)
## Loading required package: lattice
##
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
##
## parallel
library(see)
sugerencia=parameters::n_factors(corMatrix)
#instalar 'see'
# tenemos:
plot(sugerencia)
6. Redimensionar a numero menor de factores * Resultado inicial:
resfa <- fa(theData,nfactors = 2,cor = 'mixed',rotate ="varimax",fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## mixed.cor is deprecated, please use mixedCor.
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## IDH 0.827
## Campañas informativas
## Población urbana (% al 2018) 0.598
## Gasto en Salud (2017)
## PBI per cápita (2018) 0.819
## Indice de Rigurosidad 0.973
## Apoyo Economico
## Densidad (2018)
## Desempleo (% al 2019)
## RegulatoryQual 0.942
## control_co 0.941
## ruleOflaw 0.957
## VoiceAccoutnability 0.771
## EstabilidadPolitica 0.816
## GEE 0.942
##
## MR1 MR2
## SS loadings 6.860 1.741
## Proportion Var 0.457 0.116
## Cumulative Var 0.457 0.573
fa.diagram(resfa, main = c("Gráfico 2: Árbol de factorización del primer modelo"))
Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?
resfa$crms #si
## [1] 0.06416853
¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?
resfa$RMSEA #no pasa
## RMSEA lower upper confidence
## 0.1475114 0.1300374 0.1670539 0.9000000
¿El índice de Tucker-Lewis es mayor a 0.9?
resfa$TLI #0.82
## [1] 0.8261077
¿Qué variables aportaron mas a los factores?
sort(resfa$communality)
## Densidad (2018) Desempleo (% al 2019)
## 0.02764488 0.07529211
## Gasto en Salud (2017) Campañas informativas
## 0.18374782 0.23984585
## Apoyo Economico Población urbana (% al 2018)
## 0.24646787 0.41566714
## VoiceAccoutnability EstabilidadPolitica
## 0.59675656 0.66828487
## PBI per cápita (2018) IDH
## 0.71870553 0.73985075
## control_co RegulatoryQual
## 0.89421306 0.90861209
## GEE ruleOflaw
## 0.94065178 0.94474044
## Indice de Rigurosidad
## 1.00064695
¿Qué variables contribuyen a mas de un factor? #conviene que salga 1
sort(resfa$complexity) #desempleo
## Apoyo Economico EstabilidadPolitica
## 1.002875 1.006240
## VoiceAccoutnability Campañas informativas
## 1.008069 1.018266
## control_co Gasto en Salud (2017)
## 1.020791 1.025106
## RegulatoryQual ruleOflaw
## 1.048192 1.061428
## Indice de Rigurosidad GEE
## 1.114654 1.121439
## PBI per cápita (2018) IDH
## 1.140260 1.162113
## Densidad (2018) Población urbana (% al 2018)
## 1.162977 1.316069
## Desempleo (% al 2019)
## 1.891938
factorial_casos<-as.data.frame(resfa$scores)
head(factorial_casos)
summary(factorial_casos)
## MR1 MR2
## Min. :-2.1983 Min. :-1.9845
## 1st Qu.:-0.7568 1st Qu.:-0.6991
## Median :-0.1731 Median :-0.2371
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7598 3rd Qu.: 0.6143
## Max. : 1.9421 Max. : 3.2857
#Alternativa 2 Calculemos matriz de correlación:
laData = data
laData = (data[, c(2, 7:14, 16:21)])
names(laData)
## [1] "HDI" "infoalawk" "%poburb18"
## [4] "GS_2017" "PPP_2018" "StringencyIndex"
## [7] "Apoyo Economico" "Densidad (2018)" "Desempleo (% al 2019)"
## [10] "Regulatory_quality" "control_co" "Ruleoflaw"
## [13] "Voice_acco" "Political_sta" "GEE"
laData$`Apoyo Economico` = as.factor(laData$`Apoyo Economico`)
laData$`Apoyo Economico` = as.numeric(laData$`Apoyo Economico`)
laData$`Apoyo Economico`=recode(laData$`Apoyo Economico`, "1=2; 2=1", as.factor = F)
laData$infoalawk = as.numeric(laData$infoalawk)
table(laData$`Apoyo Economico`)
##
## 1 2
## 117 7
str(laData)
## 'data.frame': 124 obs. of 15 variables:
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ infoalawk : num 2 1 0 0 2 2 2 2 2 2 ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Apoyo Economico : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019): int 24 7 4 2 8 6 6 5 7 1 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
#trabajando poburabano
laData$`%poburb18` = NULL
corMatrixw=polycor::hetcor(laData)$correlations
Explorar correlaciones:
ggcorrplot(corMatrixw)
#evaluandos ignificancia
ggcorrplot(corMatrixw,
p.mat = cor_pmat(corMatrixw),
insig = "blank",
title = "Gráfico 5: Matriz de correlación")
psych::KMO(corMatrixw)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrixw)
## Overall MSA = 0.88
## MSA for each item =
## HDI infoalawk GS_2017
## 0.90 0.44 0.88
## PPP_2018 StringencyIndex Apoyo Economico
## 0.95 0.72 0.40
## Densidad (2018) Desempleo (% al 2019) Regulatory_quality
## 0.61 0.81 0.88
## control_co Ruleoflaw Voice_acco
## 0.90 0.91 0.86
## Political_sta GEE
## 0.89 0.89
cortest.bartlett(corMatrixw,n=nrow(laData))$p.value>0.05
## [1] FALSE
is.singular.matrix(corMatrixw)
## [1] FALSE
fa.parallel(laData, fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
#instalar 'parameters' y 'n_factors'
sugerencia1=parameters::n_factors(corMatrixw)
#instalar 'see'
# tenemos:
plot(sugerencia1)
6. Redimensionar a numero menor de factores * Resultado inicial:
resfa_2 <- fa(laData,nfactors = 2,cor = 'mixed',rotate ="varimax",fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## mixed.cor is deprecated, please use mixedCor.
print(resfa_2$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## HDI 0.807
## infoalawk
## GS_2017
## PPP_2018 0.813
## StringencyIndex 0.977
## Apoyo Economico
## Densidad (2018)
## Desempleo (% al 2019)
## Regulatory_quality 0.943
## control_co 0.946
## Ruleoflaw 0.964
## Voice_acco 0.777
## Political_sta 0.819
## GEE 0.944
##
## MR1 MR2
## SS loadings 6.498 1.690
## Proportion Var 0.464 0.121
## Cumulative Var 0.464 0.585
fa.diagram(resfa_2, main = c("Gráfico 2: Árbol de factorización del primer modelo"))
Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?
resfa_2$crms #si
## [1] 0.06244545
¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?
resfa_2$RMSEA #no
## RMSEA lower upper confidence
## 0.1385199 0.1191850 0.1599397 0.9000000
¿El índice de Tucker-Lewis es mayor a 0.9?
resfa_2$TLI #0.85
## [1] 0.8590452
¿Qué variables aportaron mas a los factores?
sort(resfa_2$communality)
## Densidad (2018) Desempleo (% al 2019) GS_2017
## 0.02614604 0.07134946 0.18243492
## infoalawk Apoyo Economico Voice_acco
## 0.23932166 0.25013174 0.60546233
## Political_sta HDI PPP_2018
## 0.67232630 0.69956305 0.70735966
## control_co Regulatory_quality GEE
## 0.90542534 0.91104046 0.94746452
## Ruleoflaw StringencyIndex
## 0.95960930 1.01025090
¿Qué variables contribuyen a mas de un factor? #conviene que salga 1
sort(resfa_2$complexity)
## Apoyo Economico Political_sta Voice_acco
## 1.002896 1.005226 1.005356
## infoalawk control_co GS_2017
## 1.017275 1.023719 1.026774
## Regulatory_quality Ruleoflaw StringencyIndex
## 1.050215 1.066870 1.114701
## GEE PPP_2018 Densidad (2018)
## 1.125615 1.137687 1.138584
## HDI Desempleo (% al 2019)
## 1.145359 1.874363
factorial_casos2<-as.data.frame(resfa_2$scores) #en esta no me sale el factorial
head(factorial_casos2)
summary(factorial_casos2)
## MR1 MR2
## Min. :-2.1445 Min. :-2.1021
## 1st Qu.:-0.7387 1st Qu.:-0.7215
## Median :-0.1786 Median :-0.2346
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7666 3rd Qu.: 0.6181
## Max. : 1.9603 Max. : 3.3280
#Mergear factoriales
ABCD=cbind(data[1],as.data.frame(resfa$scores))
data$capacidad=normalize(ABCD$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
data$medidas=normalize(ABCD$MR2,
method = "range",
margin=2, # by column
range = c(0, 10))
CCMA=cbind(data[1],as.data.frame(resfa_2$scores))
data$CAPACIDADsinURB_ABC= normalize(CCMA$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
data$MEDIDAS2_ABC=normalize(CCMA$MR2,
method = "range",
margin=2, # by column
range = c(0, 10))
library(ggpubr) #gráfico para ver normalidad
library(scatterplot3d)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(lmtest)
Sin utilizar factoriales
#infoawk (o como se escriba)
str(data$infoalawk)
## int [1:124] 2 1 0 0 2 2 2 2 2 2 ...
data$infoalawk = as.ordered(data$infoalawk)
levels(data$infoalawk) = c("Ninguna", "Campañas del gobierno", "Campañas integrales")
table(data$infoalawk)
##
## Ninguna Campañas del gobierno Campañas integrales
## 18 19 87
#apoyo economico
data$`Apoyo Economico` = as.factor(data$`Apoyo Economico`)
levels(data$`Apoyo Economico`) <- c("Sin apoyo", "Menos del 50% del sueldo", "Más del 50% del sueldo")
data$`Apoyo Economico` = as.ordered(data$`Apoyo Economico`)
table(data$`Apoyo Economico`)
##
## Sin apoyo Menos del 50% del sueldo
## 7 117
names(data)
## [1] "Country" "HDI"
## [3] "EXPECTATIVAVIDA" "EXPECTCOLE"
## [5] "YEARS_SCHOOLING" "GNI_GROSSNATIONALINCOME"
## [7] "infoalawk" "%poburb18"
## [9] "GS_2017" "PPP_2018"
## [11] "StringencyIndex" "Apoyo Economico"
## [13] "Densidad (2018)" "Desempleo (% al 2019)"
## [15] "Contagd100" "Regulatory_quality"
## [17] "control_co" "Ruleoflaw"
## [19] "Voice_acco" "Political_sta"
## [21] "GEE" "capacidad"
## [23] "medidas" "CAPACIDADsinURB_ABC"
## [25] "MEDIDAS2_ABC"
data_regre = data
data_regre$Country = NULL
str(data_regre)
## 'data.frame': 124 obs. of 24 variables:
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 81.8 77.8 76.5 ...
## $ EXPECTCOLE : num 10.1 11.8 13.3 13.6 17.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.16 10.95 10.56 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 48641 66912 17611 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Apoyo Economico : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 2 2 2 2 2 2 2 2 2 2 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019) : int 24 7 4 2 8 6 6 5 7 1 ...
## $ Contagd100 : int 16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
## $ capacidad : num 1.03 2.42 8.3 7.26 4.4 ...
## $ medidas : num 3.054 3.015 0.883 1.02 2.221 ...
## $ CAPACIDADsinURB_ABC : num 0.852 2.271 8.402 7.163 4.144 ...
## $ MEDIDAS2_ABC : num 3.07 3.29 1.06 1.27 2.6 ...
data_regre$`Desempleo (% al 2019)`=as.numeric(data_regre$`Desempleo (% al 2019)`)
names(data_regre)[14]="Contagiados"
data_regre$Contagiados=as.numeric(data_regre$Contagiados)
QUIERO = formula(Contagiados ~ data_regre$capacidad + data_regre$medidas + data_regre$`Desempleo (% al 2019)` + data_regre$`Densidad (2018)`)
totalidad = glm(QUIERO, family=poisson, data=data_regre)
summary(totalidad)
##
## Call:
## glm(formula = QUIERO, family = poisson, data = data_regre)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -364.5 -209.1 -144.8 -30.0 1556.0
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.045e+01 1.698e-03 6154.2 <2e-16 ***
## data_regre$capacidad 1.111e-01 2.059e-04 539.7 <2e-16 ***
## data_regre$medidas -2.126e-01 3.192e-04 -666.0 <2e-16 ***
## data_regre$`Desempleo (% al 2019)` 1.650e-03 5.690e-05 29.0 <2e-16 ***
## data_regre$`Densidad (2018)` -1.158e-04 8.413e-07 -137.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 9737350 on 123 degrees of freedom
## Residual deviance: 8938426 on 119 degrees of freedom
## AIC: 8939719
##
## Number of Fisher Scoring iterations: 8
exp(confint(totalidad))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 3.441116e+04 3.464096e+04
## data_regre$capacidad 1.117072e+00 1.117974e+00
## data_regre$medidas 8.079677e-01 8.089794e-01
## data_regre$`Desempleo (% al 2019)` 1.001540e+00 1.001763e+00
## data_regre$`Densidad (2018)` 9.998825e-01 9.998858e-01
AER::dispersiontest(totalidad,trafo=1) #significativa
##
## Overdispersion test
##
## data: totalidad
## z = 1.8402, p-value = 0.03287
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
## alpha
## 154143.4
QUE = formula(Contagiados ~ data_regre$CAPACIDADsinURB_ABC+data_regre$MEDIDAS2_ABC+data_regre$`%poburb18`)
totalidad2 = glm(QUE, family=poisson, data=data_regre)
summary(totalidad2)
##
## Call:
## glm(formula = QUE, family = poisson, data = data_regre)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -408.20 -207.04 -118.81 -20.33 1274.85
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.693e+00 2.573e-03 3378.8 <2e-16 ***
## data_regre$CAPACIDADsinURB_ABC -5.991e-02 2.381e-04 -251.6 <2e-16 ***
## data_regre$MEDIDAS2_ABC -1.181e-01 3.219e-04 -367.0 <2e-16 ***
## data_regre$`%poburb18` 3.475e-02 3.316e-05 1047.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 9737350 on 123 degrees of freedom
## Residual deviance: 7922074 on 120 degrees of freedom
## AIC: 7923365
##
## Number of Fisher Scoring iterations: 6
exp(confint(totalidad2))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 5928.3644525 5988.4527961
## data_regre$CAPACIDADsinURB_ABC 0.9414135 0.9422926
## data_regre$MEDIDAS2_ABC 0.8880265 0.8891477
## data_regre$`%poburb18` 1.0352921 1.0354267
AER::dispersiontest(totalidad2,trafo=1) #significativa
##
## Overdispersion test
##
## data: totalidad2
## z = 2.6699, p-value = 0.003794
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
## alpha
## 103725.9
#Viendo el valor de Urbana sería mejor verla como cluster?
##5 Usando clusters: s epueden usar en regresion clusters para crear una ordinal
set.seed(123)
# creemos clusters con las variable relacionadas con felicidad
str(data)
## 'data.frame': 124 obs. of 25 variables:
## $ Country : chr "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 81.8 77.8 76.5 ...
## $ EXPECTCOLE : num 10.1 11.8 13.3 13.6 17.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.16 10.95 10.56 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 48641 66912 17611 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Apoyo Economico : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 2 2 2 2 2 2 2 2 2 2 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019) : int 24 7 4 2 8 6 6 5 7 1 ...
## $ Contagd100 : int 16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
## $ capacidad : num 1.03 2.42 8.3 7.26 4.4 ...
## $ medidas : num 3.054 3.015 0.883 1.02 2.221 ...
## $ CAPACIDADsinURB_ABC : num 0.852 2.271 8.402 7.163 4.144 ...
## $ MEDIDAS2_ABC : num 3.07 3.29 1.06 1.27 2.6 ...
dist=daisy(data[8], metric="gower")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(data[8], pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
res.pam = pam(dist,6,cluster.only = F)
fviz_silhouette(res.pam)
## cluster size ave.sil.width
## 1 1 16 0.61
## 2 2 21 0.61
## 3 3 24 0.53
## 4 4 26 0.61
## 5 5 19 0.53
## 6 6 18 0.64
data$pamURB=res.pam$clustering
aggregate(data$`%poburb18`~pamURB,data = data,FUN = mean) #primero tengo que ver esten eordenno siempre lo están si no está que reordenar, no convertir en clsuters sin hacerlo...
data$pamURB=recode(data$pamURB, "1=1; 5=2; 4=3; 2=4; 6=5; 3=6", as.factor = T)
Va como ascendente ahora
#como ordinal!
data$pamURB=as.ordered(data$pamURB)
str(data)
## 'data.frame': 124 obs. of 26 variables:
## $ Country : chr "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 81.8 77.8 76.5 ...
## $ EXPECTCOLE : num 10.1 11.8 13.3 13.6 17.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.16 10.95 10.56 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 48641 66912 17611 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Apoyo Economico : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 2 2 2 2 2 2 2 2 2 2 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019) : int 24 7 4 2 8 6 6 5 7 1 ...
## $ Contagd100 : int 16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
## $ capacidad : num 1.03 2.42 8.3 7.26 4.4 ...
## $ medidas : num 3.054 3.015 0.883 1.02 2.221 ...
## $ CAPACIDADsinURB_ABC : num 0.852 2.271 8.402 7.163 4.144 ...
## $ MEDIDAS2_ABC : num 3.07 3.29 1.06 1.27 2.6 ...
## $ pamURB : Ord.factor w/ 6 levels "1"<"2"<"3"<"4"<..: 1 4 6 6 6 6 3 3 6 2 ...
QUEda = formula(Contagiados ~ data$CAPACIDADsinURB_ABC+data$MEDIDAS2_ABC+data$pamURB)
totalidad3 = glm(QUEda, family=poisson, data=data_regre)
summary(totalidad3)
##
## Call:
## glm(formula = QUEda, family = poisson, data = data_regre)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -450.04 -162.97 -104.43 29.13 1345.54
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.4626205 0.0018163 5760.32 <2e-16 ***
## data$CAPACIDADsinURB_ABC -0.0577846 0.0002408 -239.94 <2e-16 ***
## data$MEDIDAS2_ABC -0.0884593 0.0003345 -264.44 <2e-16 ***
## data$pamURB.L 2.5628125 0.0032078 798.92 <2e-16 ***
## data$pamURB.Q -0.6363585 0.0029150 -218.30 <2e-16 ***
## data$pamURB.C -0.0602533 0.0022474 -26.81 <2e-16 ***
## data$pamURB^4 -0.9925754 0.0017264 -574.94 <2e-16 ***
## data$pamURB^5 0.0491363 0.0016106 30.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 9737350 on 123 degrees of freedom
## Residual deviance: 6806056 on 116 degrees of freedom
## AIC: 6807355
##
## Number of Fisher Scoring iterations: 6
exp(confint(totalidad3))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 3.485875e+04 3.510783e+04
## data$CAPACIDADsinURB_ABC 9.434078e-01 9.442988e-01
## data$MEDIDAS2_ABC 9.147402e-01 9.159406e-01
## data$pamURB.L 1.289104e+01 1.305417e+01
## data$pamURB.Q 5.261973e-01 5.322446e-01
## data$pamURB.C 9.373911e-01 9.456859e-01
## data$pamURB^4 3.693688e-01 3.718770e-01
## data$pamURB^5 1.047053e+00 1.053685e+00
AER::dispersiontest(totalidad3,trafo=1) #significativa
##
## Overdispersion test
##
## data: totalidad3
## z = 2.0065, p-value = 0.0224
## alternative hypothesis: true alpha is greater than 0
## sample estimates:
## alpha
## 92310.97
#LINEALIDAD
plot(totalidad3, 1, main = c("Gráfico 2: Linealidad")) #diagonal, casi lineal
B. Homocedasticidad.
plot(totalidad3, 3, main = c("Gráfico 3: Homocedasticidad"))#diagonal
bptest(totalidad3) #valor P mayor a 0.05 Homocedasticidad
##
## studentized Breusch-Pagan test
##
## data: totalidad3
## BP = 7.2904, df = 7, p-value = 0.3993
c. Normalidad de residuos. Puntos cerca de la diagonal.
plot(totalidad3, 2, main = c("Gráfico 4: Normalidad de residuos")) #se alejan de diagonal
shapiro.test(totalidad3$residuals) #menor a 0.05 el valor P entonces indica que no hay normaldiad de residusos
##
## Shapiro-Wilk normality test
##
## data: totalidad3$residuals
## W = 0.57632, p-value < 2.2e-16
library(DescTools)
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:car':
##
## Recode
## The following object is masked from 'package:BBmisc':
##
## %nin%
## The following objects are masked from 'package:psych':
##
## AUC, ICC, SD
VIF(totalidad3)
## GVIF Df GVIF^(1/(2*Df))
## data$CAPACIDADsinURB_ABC 1.311125 1 1.145044
## data$MEDIDAS2_ABC 1.062234 1 1.030647
## data$pamURB 1.356730 5 1.030978
5.2 ver valores influyentes Prestar atención al indice de Cook.
plot(totalidad3, 5, main = c("Gráfico 5: Identificación de valores influyentes"))
checktotalidad3=as.data.frame(influence.measures(totalidad3)$is.inf)
checktotalidad3[checktotalidad3$cook.d | checktotalidad3$hat,] #22 y 73