##PREGUNTA 1
library(readxl)
data1 <- read_excel("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
View(data1)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#limpieza de data :)
data1 <- data1 %>%
select(-1)
data1 <- data1[-c(1:4, (nrow(data1)-3):nrow(data1)), ]
colnames(data1) <- data1[1, ]
data1 <- data1[-1, ]
data1 <- data1[, -1]
#Porcentajes de los conteos positivos para todos los casos
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
dontselect=c("No usa electricidad","No usa gas (balón GLP)","No usa carbón",
"No usa leña", "Departamento")
select=setdiff(names(data1),dontselect)
theData=data1[,select]
library(magrittr)
head(theData,10)%>%
rmarkdown::paged_table()
library(polycor)
##
## Attaching package: 'polycor'
## The following object is masked from 'package:psych':
##
## polyserial
str(theData)
## tibble [25 × 4] (S3: tbl_df/tbl/data.frame)
## $ SÃ usa electricidad : chr [1:25] "1283" "6837" "1360" "12878" ...
## $ Sà usa gas (balón GLP): chr [1:25] "52951" "188443" "56444" "382813" ...
## $ Sà usa carbón : chr [1:25] "943" "2408" "471" "1789" ...
## $ Sà usa leña : chr [1:25] "74920" "149287" "89197" "58115" ...
theData[] <- lapply(theData, function(x) as.numeric(as.character(x)))
class(theData)
## [1] "tbl_df" "tbl" "data.frame"
theData <- as.data.frame(theData)
corMatrix <- hetcor(theData)$correlations
print(corMatrix)
## Sà usa electricidad Sà usa gas (balón GLP) Sà usa carbón
## SÃ usa electricidad 1.00000000 0.9904857 0.2937747
## Sà usa gas (balón GLP) 0.99048568 1.0000000 0.3352982
## Sà usa carbón 0.29377471 0.3352982 1.0000000
## Sà usa leña 0.07598315 0.1468580 0.2094292
## Sà usa leña
## SÃ usa electricidad 0.07598315
## Sà usa gas (balón GLP) 0.14685797
## Sà usa carbón 0.20942915
## Sà usa leña 1.00000000
round(corMatrix,2)
## Sà usa electricidad Sà usa gas (balón GLP) Sà usa carbón
## SÃ usa electricidad 1.00 0.99 0.29
## Sà usa gas (balón GLP) 0.99 1.00 0.34
## Sà usa carbón 0.29 0.34 1.00
## Sà usa leña 0.08 0.15 0.21
## Sà usa leña
## SÃ usa electricidad 0.08
## Sà usa gas (balón GLP) 0.15
## Sà usa carbón 0.21
## Sà usa leña 1.00
library(ggcorrplot)
ggcorrplot(corMatrix)
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.44
## MSA for each item =
## Sà usa electricidad Sà usa gas (balón GLP) Sà usa carbón
## 0.46 0.46 0.63
## Sà usa leña
## 0.13
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
RESULTADO: No se puede hacer varimax ni oblimin, fa.parallel indica hacer solo un factor, la correlación es baja y menor a 0.05 en las variables
##PREGUNTA 2
library(readxl)
data2 <- read_excel("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
View(data2)
Utilizando el porcentaje de viviendas que tiene agua de red publica dentro de la vivienda (agua1_Red), la razón de votacion de keiko entre castillo, y la tasa fallecidos por cada 1000 contagiados, sin Lima
data2$agua1_Red=data2$agua1_Red/data2$agua10_Total
data2$CastilloKeiko_ratio=data2$Castillo/data2$Keiko
data2$fallecido_x10000POS=1000*data2$countFallecidos/data2$countPositivos
keptForCluster=c('key','agua1_Red','CastilloKeiko_ratio','fallecido_x10000POS')
data2_small=data2[,keptForCluster]
data2_small=data2_small[!data2_small$key=='LIMA+LIMA',]
boxplot(BBmisc::normalize(data2_small[,-1],method='standardize'))
data2_small_norm=data2_small
data2_small_norm[,-1]=BBmisc::normalize(data2_small[,-1],method='standardize')
cor(data2_small_norm[,-1],use = "complete.obs")
## agua1_Red CastilloKeiko_ratio fallecido_x10000POS
## agua1_Red 1.0000000 -0.3226139 0.1035734
## CastilloKeiko_ratio -0.3226139 1.0000000 0.1532277
## fallecido_x10000POS 0.1035734 0.1532277 1.0000000
dataClus=data2_small_norm[,-1]
row.names(dataClus)=data2_small_norm$key
## Warning: Setting row names on a tibble is deprecated.
dataClus=dataClus[complete.cases(dataClus),]
g.dist = cluster::daisy(dataClus, metric="gower")
#Diana
factoextra::fviz_nbclust(dataClus, factoextra::hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")
#Agnes
factoextra::fviz_nbclust(dataClus, factoextra::hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
res.agnes<- factoextra::hcut(g.dist, k = 7,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
library(magrittr)
silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$country=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'Provincia']%>%sort()
poorAGNES
## NULL
RESULTADO: Agnes (aglomerativa) es mejor porque agrupa en diferentes grupos mientras que Diana no y las agrupa en 1 solo
##PREGUNTA 3 hacer 3 regresiones. Como independientes usará todas las variables de COMMs y ENERGY. Cada regresión usa una variable dependiente diferente del grupo ECON, pero no usa deuda externa.
library(readxl)
data3 <- read_excel("datafinal.xlsx")
View(data3)
Inflation rate (consumer prices) % of inflation Youth unemployment rate (ages 15-24) % of youth unemployment Public debt % public debt Debt - external % public debt - external