##PREGUNTA 1

library(readxl)
data1 <- read_excel("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
View(data1)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#limpieza de data :)

data1 <- data1 %>%
  select(-1)
data1 <- data1[-c(1:4, (nrow(data1)-3):nrow(data1)), ]
colnames(data1) <- data1[1, ]
data1 <- data1[-1, ]
data1 <- data1[, -1]

#Porcentajes de los conteos positivos para todos los casos

library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
dontselect=c("No usa electricidad","No usa gas (balón GLP)","No usa carbón",
             "No usa leña", "Departamento")
select=setdiff(names(data1),dontselect) 
theData=data1[,select]

library(magrittr)
head(theData,10)%>%
    rmarkdown::paged_table()
library(polycor)
## 
## Attaching package: 'polycor'
## The following object is masked from 'package:psych':
## 
##     polyserial
str(theData)
## tibble [25 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Sí usa electricidad   : chr [1:25] "1283" "6837" "1360" "12878" ...
##  $ Sí usa gas (balón GLP): chr [1:25] "52951" "188443" "56444" "382813" ...
##  $ Sí usa carbón         : chr [1:25] "943" "2408" "471" "1789" ...
##  $ Sí usa leña           : chr [1:25] "74920" "149287" "89197" "58115" ...
theData[] <- lapply(theData, function(x) as.numeric(as.character(x)))
class(theData)
## [1] "tbl_df"     "tbl"        "data.frame"
theData <- as.data.frame(theData)
corMatrix <- hetcor(theData)$correlations
print(corMatrix)
##                        Sí usa electricidad Sí usa gas (balón GLP) Sí usa carbón
## Sí usa electricidad             1.00000000              0.9904857     0.2937747
## Sí usa gas (balón GLP)          0.99048568              1.0000000     0.3352982
## Sí usa carbón                   0.29377471              0.3352982     1.0000000
## Sí usa leña                     0.07598315              0.1468580     0.2094292
##                        Sí usa leña
## Sí usa electricidad     0.07598315
## Sí usa gas (balón GLP)  0.14685797
## Sí usa carbón           0.20942915
## Sí usa leña             1.00000000
round(corMatrix,2)
##                        Sí usa electricidad Sí usa gas (balón GLP) Sí usa carbón
## Sí usa electricidad                   1.00                   0.99          0.29
## Sí usa gas (balón GLP)                0.99                   1.00          0.34
## Sí usa carbón                         0.29                   0.34          1.00
## Sí usa leña                           0.08                   0.15          0.21
##                        Sí usa leña
## Sí usa electricidad           0.08
## Sí usa gas (balón GLP)        0.15
## Sí usa carbón                 0.21
## Sí usa leña                   1.00
library(ggcorrplot)
ggcorrplot(corMatrix)

psych::KMO(corMatrix) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.44
## MSA for each item = 
##    Sí usa electricidad Sí usa gas (balón GLP)          Sí usa carbón 
##                   0.46                   0.46                   0.63 
##            Sí usa leña 
##                   0.13
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA

RESULTADO: No se puede hacer varimax ni oblimin, fa.parallel indica hacer solo un factor, la correlación es baja y menor a 0.05 en las variables

##PREGUNTA 2

library(readxl)
data2 <- read_excel("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
View(data2)

Utilizando el porcentaje de viviendas que tiene agua de red publica dentro de la vivienda (agua1_Red), la razón de votacion de keiko entre castillo, y la tasa fallecidos por cada 1000 contagiados, sin Lima

data2$agua1_Red=data2$agua1_Red/data2$agua10_Total
data2$CastilloKeiko_ratio=data2$Castillo/data2$Keiko
data2$fallecido_x10000POS=1000*data2$countFallecidos/data2$countPositivos
keptForCluster=c('key','agua1_Red','CastilloKeiko_ratio','fallecido_x10000POS')
data2_small=data2[,keptForCluster]
data2_small=data2_small[!data2_small$key=='LIMA+LIMA',]
boxplot(BBmisc::normalize(data2_small[,-1],method='standardize'))

data2_small_norm=data2_small
data2_small_norm[,-1]=BBmisc::normalize(data2_small[,-1],method='standardize')
cor(data2_small_norm[,-1],use = "complete.obs")
##                      agua1_Red CastilloKeiko_ratio fallecido_x10000POS
## agua1_Red            1.0000000          -0.3226139           0.1035734
## CastilloKeiko_ratio -0.3226139           1.0000000           0.1532277
## fallecido_x10000POS  0.1035734           0.1532277           1.0000000
dataClus=data2_small_norm[,-1]
row.names(dataClus)=data2_small_norm$key 
## Warning: Setting row names on a tibble is deprecated.
dataClus=dataClus[complete.cases(dataClus),]
g.dist = cluster::daisy(dataClus, metric="gower")

#Diana

factoextra::fviz_nbclust(dataClus, factoextra::hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")

#Agnes

factoextra::fviz_nbclust(dataClus, factoextra::hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

res.agnes<- factoextra::hcut(g.dist, k = 7,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
library(magrittr)
silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$country=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'Provincia']%>%sort()
poorAGNES
## NULL

RESULTADO: Agnes (aglomerativa) es mejor porque agrupa en diferentes grupos mientras que Diana no y las agrupa en 1 solo

##PREGUNTA 3 hacer 3 regresiones. Como independientes usará todas las variables de COMMs y ENERGY. Cada regresión usa una variable dependiente diferente del grupo ECON, pero no usa deuda externa.

library(readxl)
data3 <- read_excel("datafinal.xlsx")
View(data3)

Inflation rate (consumer prices) % of inflation Youth unemployment rate (ages 15-24) % of youth unemployment Public debt % public debt Debt - external % public debt - external