Pregunta 1 Data INEI:
library(rio)
inei=import("reporteEXA.xlsx")
names(inei)
## [1] "Código" "Provincia" "No usa electricidad"
## [4] "Sí usa electricidad" "No usa gas (balón GLP)" "Sí usa gas (balón GLP)"
## [7] "No usa carbón" "Sí usa carbón" "No usa leña"
## [10] "Sí usa leña"
Análisis factorial con los datos:
str(inei)
## 'data.frame': 197 obs. of 10 variables:
## $ Código : chr "101" "102" "103" "104" ...
## $ Provincia : chr "Amazonas, provincia: Chachapoyas" "Amazonas, provincia: Bagua" "Amazonas, provincia: Bongara" "Amazonas, provincia: Condorcanqui" ...
## $ No usa electricidad : num 14763 20313 7689 9853 13112 ...
## $ Sí usa electricidad : num 574 161 124 14 90 65 255 921 16 33 ...
## $ No usa gas (balón GLP): num 4696 10557 3154 8331 6863 ...
## $ Sí usa gas (balón GLP): num 10641 9917 4659 1536 6339 ...
## $ No usa carbón : num 15161 20185 7755 9841 13169 ...
## $ Sí usa carbón : num 176 289 58 26 33 26 335 218 4 4 ...
## $ No usa leña : num 7236 7357 2345 1059 1833 ...
## $ Sí usa leña : num 8101 13117 5468 8808 11369 ...
inei=inei[-c(197),]
names(inei)[names(inei)=="Código"]<-"Codigo"
inei$Codigo<-as.numeric(inei$Codigo)
str(inei$Codigo)
## num [1:196] 101 102 103 104 105 106 107 201 202 203 ...
summary(inei)
## Codigo Provincia No usa electricidad Sí usa electricidad
## Min. : 101.0 Length:196 Min. : 687 Min. : 1.00
## 1st Qu.: 507.8 Class :character 1st Qu.: 8176 1st Qu.: 57.75
## Median :1056.0 Mode :character Median : 15798 Median : 146.50
## Mean :1145.9 Mean : 40812 Mean : 1291.95
## 3rd Qu.:1702.2 3rd Qu.: 32071 3rd Qu.: 479.00
## Max. :2504.0 Max. :2228751 Max. :125199.00
## No usa gas (balón GLP) Sí usa gas (balón GLP) No usa carbón
## Min. : 572 Min. : 116 Min. : 600
## 1st Qu.: 4649 1st Qu.: 2740 1st Qu.: 8193
## Median : 7776 Median : 7402 Median : 15618
## Mean : 12711 Mean : 29393 Mean : 40983
## 3rd Qu.: 12664 3rd Qu.: 17661 3rd Qu.: 31502
## Max. :515782 Max. :1838168 Max. :2329530
## Sí usa carbón No usa leña Sí usa leña
## Min. : 4.0 Min. : 175 Min. : 510
## 1st Qu.: 33.0 1st Qu.: 1801 1st Qu.: 4987
## Median : 106.5 Median : 5155 Median : 9004
## Mean : 1120.6 Mean : 30459 Mean :11645
## 3rd Qu.: 348.2 3rd Qu.: 16494 3rd Qu.:15076
## Max. :29288.0 Max. :2310046 Max. :58210
sum(is.na(inei))
## [1] 0
library(psych)
library(GPArotation)
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
Seleccionando:
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
inei[,c(4,6,8,10)]=normalize(inei[,c(4,6,8,10)],method='standardize')
theData=inei[,c(4,6,8,10)]
library(magrittr)
head(theData,10)%>%
rmarkdown::paged_table()
library(polycor)
##
## Attaching package: 'polycor'
## The following object is masked from 'package:psych':
##
## polyserial
summary(theData)
## Sí usa electricidad Sí usa gas (balón GLP) Sí usa carbón Sí usa leña
## Min. :-0.14275 Min. :-0.21565 Min. :-0.2870 Min. :-1.1077
## 1st Qu.:-0.13647 1st Qu.:-0.19633 1st Qu.:-0.2795 1st Qu.:-0.6623
## Median :-0.12666 Median :-0.16198 Median :-0.2606 Median :-0.2628
## Mean : 0.00000 Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.08989 3rd Qu.:-0.08642 3rd Qu.:-0.1985 3rd Qu.: 0.3413
## Max. :13.70080 Max. :13.32348 Max. : 7.2387 Max. : 4.6322
library(DescTools)
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:BBmisc':
##
## %nin%
## The following objects are masked from 'package:psych':
##
## AUC, ICC, SD
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(kableExtra)
cor(theData)
## Sí usa electricidad Sí usa gas (balón GLP) Sí usa carbón
## Sí usa electricidad 1.0000000 0.9920048 0.4873608
## Sí usa gas (balón GLP) 0.9920048 1.0000000 0.5269917
## Sí usa carbón 0.4873608 0.5269917 1.0000000
## Sí usa leña 0.2868231 0.3426981 0.4168478
## Sí usa leña
## Sí usa electricidad 0.2868231
## Sí usa gas (balón GLP) 0.3426981
## Sí usa carbón 0.4168478
## Sí usa leña 1.0000000
corMatrix=polycor::hetcor(theData)$correlations
round(corMatrix,2)
## Sí usa electricidad Sí usa gas (balón GLP) Sí usa carbón
## Sí usa electricidad 1.00 0.99 0.49
## Sí usa gas (balón GLP) 0.99 1.00 0.53
## Sí usa carbón 0.49 0.53 1.00
## Sí usa leña 0.29 0.34 0.42
## Sí usa leña
## Sí usa electricidad 0.29
## Sí usa gas (balón GLP) 0.34
## Sí usa carbón 0.42
## Sí usa leña 1.00
KMO
library(psych)
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.56
## MSA for each item =
## Sí usa electricidad Sí usa gas (balón GLP) Sí usa carbón
## 0.52 0.53 0.81
## Sí usa leña
## 0.52
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
resfa <- fa(theData,
nfactors = 2,
cor = 'mixed',
rotate = "varimax",
fm="minres")
print(resfa$loadings)
##
## Loadings:
## MR1 MR2
## Sí usa electricidad 0.968 0.242
## Sí usa gas (balón GLP) 0.942 0.330
## Sí usa carbón 0.365 0.554
## Sí usa leña 0.130 0.667
##
## MR1 MR2
## SS loadings 1.974 0.919
## Proportion Var 0.494 0.230
## Cumulative Var 0.494 0.723
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## Sí usa electricidad 0.968
## Sí usa gas (balón GLP) 0.942
## Sí usa carbón 0.554
## Sí usa leña 0.667
##
## MR1 MR2
## SS loadings 1.974 0.919
## Proportion Var 0.494 0.230
## Cumulative Var 0.494 0.723
Cada variable se fue a un factor, es una estructura simple
Pregunta 2
covid=import("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
names(covid)
## [1] "...1" "key"
## [3] "Código" "pared1_Ladrillo"
## [5] "pared2_Piedra" "pared3_Adobe"
## [7] "pared4_Tapia" "pared5_Quincha"
## [9] "pared6_Piedra" "pared7_Madera"
## [11] "pared8_Triplay" "pared9_Otro"
## [13] "pared10_Total" "techo1_Concreto"
## [15] "techo2_Madera" "techo3_Tejas"
## [17] "techo4_Planchas" "techo5_Caña"
## [19] "techo6_Triplay" "techo7_Paja"
## [21] "techo8_Otro" "techo9_Total"
## [23] "piso1_Parquet" "piso2_Láminas"
## [25] "piso3_Losetas" "piso4_Madera"
## [27] "piso5_Cemento" "piso6_Tierra"
## [29] "piso7_Otro" "piso8_Total"
## [31] "agua1_Red" "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón" "agua4_Camión"
## [35] "agua5_Pozo" "agua6_Manantial"
## [37] "agua7_Río" "agua8_Otro"
## [39] "agua9_Vecino" "agua10_Total"
## [41] "elec1_Sí" "elec2_No"
## [43] "elec3_Total" "departamento"
## [45] "provincia" "Castillo"
## [47] "Keiko" "ganaCastillo"
## [49] "covidPositivos" "covidFallecidos"
covidk=covid[,c(31,40,43,45,46,47,48,49,50)]
covid[,c(31,40,43,45,46,47,48,49,50)]=normalize(covid[,c(31,40,43,45,46,47,48,49,50)],method='standardize')
tasa de fallecidos por cada 1000
covidk$tasa=covidk$covidPositivos/covidk$elec3_Total*1000
str(covidk$tasa)
## num [1:196] 416.3 53.2 155.3 358.3 35.7 ...
Pregunta 3 Data CIA
library (rio)
elec=import("Electricity - installed generating capacity.csv")
names(elec)
## [1] "name" "slug" "kW"
## [4] "date_of_information" "ranking" "region"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:BBmisc':
##
## coalesce, collapse, symdiff
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df=elec%>%
select("name", "kW")
produc=import("Refined petroleum products - production.csv")
names(produc)
## [1] "name" "slug" "bbl/day"
## [4] "date_of_information" "ranking" "region"
df2=produc%>%
select("name", "bbl/day")
carbon=import("Carbon dioxide emissions.csv")
names(carbon)
## [1] "name" "slug" "metric tonnes of CO2"
## [4] "date_of_information" "ranking" "region"
df3=carbon%>%
select("name", "metric tonnes of CO2")
energy=import("Energy consumption per capita.csv")
names(energy)
## [1] "name" "slug" "Btu/person"
## [4] "date_of_information" "ranking" "region"
df4=energy%>%
select("name", "Btu/person")
telefix=import("Telephones - fixed lines.csv")
names(telefix)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
df5=telefix%>%
select("name", "value")
names(df5)[names(df5)=="value"]="valuefix"
telemob=import("Telephones - mobile cellular.csv")
names(telemob)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
df6=telemob%>%
select("name", "value")
names(df6)[names(df6)=="value"]="valuemob"
broad=import("Broadband - fixed subscriptions.csv")
names(broad)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
df11=broad%>%
select("name", "value")
names(df11)[names(df11)=="value"]="valuebroad"
inflat=import("Inflation rate (consumer prices).csv")
names(inflat)
## [1] "name" "slug" "%"
## [4] "date_of_information" "ranking" "region"
df7=inflat%>%
select("name", "%")
names(df7)[names(df7)=="%"]="% of inflation"
youth=import("Youth unemployment rate (ages 15-24).csv")
names(youth)
## [1] "name" "slug" "%"
## [4] "date_of_information" "ranking" "region"
df8=youth%>%
select("name", "%")
names(df8)[names(df8)=="%"]="% of youth unemployment"
debt=import("Public debt.csv")
names(debt)
## [1] "name" "slug" "% of GDP"
## [4] "date_of_information" "ranking" "region"
df9=debt%>%
select("name", "% of GDP")
names(df9)[names(df9)=="% of GDP"]="% public debt"
debtex=import("Public debt.csv")
names(debtex)
## [1] "name" "slug" "% of GDP"
## [4] "date_of_information" "ranking" "region"
df10=debtex%>%
select("name", "% of GDP")
names(df10)[names(df10)=="% of GDP"]="% public debt-external"
datafinal<-merge(df, df2, by = "name")
cia=import("datafinal.xlsx")
names(cia)
## [1] "name" "kW"
## [3] "bbl/day" "metric tonnes of CO2"
## [5] "Btu/person" "valuefix"
## [7] "valuemob" "% of inflation"
## [9] "% of youth unemployment" "% public debt"
## [11] "% public debt-external" "valuebroad"
str(cia)
## 'data.frame': 181 obs. of 12 variables:
## $ name : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ kW : chr "776,000" "2,531,000" "21,694,000" "7,344,000" ...
## $ bbl/day : chr "0" "5,638" "627,900" "53,480" ...
## $ metric tonnes of CO2 : chr "7,893,000" "3,794,000" "151,633,000" "19,362,000" ...
## $ Btu/person : chr "3,227,000" "38,442,000" "61,433,000" "11,693,000" ...
## $ valuefix : chr "146,000" "177,000" "5,576,000" "94,000" ...
## $ valuemob : chr "22,678,000" "2,782,000" "49,019,000" "23,978,000" ...
## $ % of inflation : chr "2.3" "6.73" "9.27" "25.75" ...
## $ % of youth unemployment: num 20.2 27.8 31.9 18.5 29.9 36.1 10.8 11.4 16.5 30.8 ...
## $ % public debt : num 7 82.4 27.5 65 57.6 ...
## $ % public debt-external : num 7 82.4 27.5 65 57.6 ...
## $ valuebroad : chr "26,570" "508,937" "3,790,459" "230,610" ...
cia$`metric tonnes of CO2`<- gsub(",", "", cia$`metric tonnes of CO2`)
str(cia$`metric tonnes of CO2`)
## chr [1:181] "7893000" "3794000" "151633000" "19362000" "193205000" ...
cia$CO2<- as.numeric(cia$`metric tonnes of CO2`)
str(cia$CO2)
## num [1:181] 7.89e+06 3.79e+06 1.52e+08 1.94e+07 1.93e+08 ...
valuefix-fixed lines
cia$valuefix<- gsub(",", "", cia$valuefix)
cia$fixed<- as.numeric(cia$valuefix)
str(cia$fixed)
## num [1:181] 146000 177000 5576000 94000 7615000 ...
cia$kW <- gsub(",", "", cia$kW)
cia$elecap <- as.numeric(cia$kW)
str(cia$elecap)
## num [1:181] 776000 2531000 21694000 7344000 44731000 ...
Regresión: fixlines y co2 Independientes: energy = kw, bbl/day, btu/person communicatios = valuefix, valuemob, value broad
Dependiente: economy = % of inflation, % youth, % public debt
modelo1<-lm(cia$`% public debt` ~ CO2+fixed, data=cia)
summary(modelo1)
##
## Call:
## lm(formula = cia$`% public debt` ~ CO2 + fixed, data = cia)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.258 -21.083 -4.635 15.639 174.016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.348e+01 2.669e+00 20.034 < 2e-16 ***
## CO2 -4.262e-08 8.138e-09 -5.237 4.57e-07 ***
## fixed 2.618e-06 4.463e-07 5.865 2.14e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.09 on 178 degrees of freedom
## Multiple R-squared: 0.1645, Adjusted R-squared: 0.1551
## F-statistic: 17.53 on 2 and 178 DF, p-value: 1.127e-07