###1. ENERGY
###1.1. Electricity
library(rio)
electricity = import("Electricity.csv")
names(electricity)
## [1] "name" "slug" "kW"
## [4] "date_of_information" "ranking" "region"
str(electricity)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kW : chr "2,217,925,000" "1,143,266,000" "432,768,000" "348,666,000" ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
#install.packages("readr")
library(readr)
electricity$kW <- parse_number(electricity$kW)
#electricity$kW <- as.integer(electricity$kW) este lo pasa a entero pero borra el valor de china (?)
str(electricity)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kW : num 2.22e+09 1.14e+09 4.33e+08 3.49e+08 2.76e+08 ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
electricity <- electricity[, !(names(electricity) %in% c("slug", "date_of_information", "ranking", "region"))]
###1.2. Refined_petroleum_products_production
re_petroleum = import("Refined_petroleum_products_production.csv")
names(re_petroleum)
## [1] "name" "slug" "bbl/day"
## [4] "date_of_information" "ranking" "region"
str(re_petroleum)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl/day : chr "20,300,000" "11,510,000" "6,076,000" "4,897,000" ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
re_petroleum$"bbl/day" <- parse_number(re_petroleum$"bbl/day")
str(re_petroleum)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl/day : num 20300000 11510000 6076000 4897000 3467000 ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
re_petroleum <- re_petroleum[, !(names(re_petroleum) %in% c("slug", "date_of_information", "ranking", "region"))]
###1.3. Carboon dioxide emissions
carbon_emissions = import("Carbon_dioxide_emissions.csv")
names(carbon_emissions)
## [1] "name" "slug" "metric tonnes of CO2"
## [4] "date_of_information" "ranking" "region"
str(carbon_emissions)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metric tonnes of CO2: chr "10,773,248,000" "5,144,361,000" "2,314,738,000" "1,848,070,000" ...
## $ date_of_information : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
carbon_emissions$"metric tonnes of CO2" <- parse_number(carbon_emissions$"metric tonnes of CO2")
str(carbon_emissions)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metric tonnes of CO2: num 1.08e+10 5.14e+09 2.31e+09 1.85e+09 1.10e+09 ...
## $ date_of_information : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
carbon_emissions <- carbon_emissions[, !(names(carbon_emissions) %in% c("slug", "date_of_information", "ranking", "region"))]
###1.4. Energy consumption per capita
energy_consumption = import("Energy_consumption_per_capita.csv")
names(energy_consumption)
## [1] "name" "slug" "Btu/person"
## [4] "date_of_information" "ranking" "region"
str(energy_consumption)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ Btu/person : chr "723,582,000" "639,951,000" "547,976,000" "471,788,000" ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
energy_consumption$"Btu/person" <- parse_number(energy_consumption$"Btu/person")
str(energy_consumption)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ Btu/person : num 7.24e+08 6.40e+08 5.48e+08 4.72e+08 4.15e+08 ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
energy_consumption <- energy_consumption[, !(names(energy_consumption) %in% c("slug", "date_of_information", "ranking", "region"))]
###2. COMMUNICATIONS
###2.1. Telephones_fixed_lines
telephones_fixed = import("Telephones_fixed_lines.csv")
names(telephones_fixed)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(telephones_fixed)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : chr "179,414,000" "91,623,000" "60,721,000" "38,580,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
telephones_fixed$value <- parse_number(telephones_fixed$value)
str(telephones_fixed)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : num 1.79e+08 9.16e+07 6.07e+07 3.86e+07 3.77e+07 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
telephones_fixed <- telephones_fixed %>%
rename(value_telefixed = value)
telephones_fixed <- telephones_fixed[, !(names(telephones_fixed) %in% c("slug", "date_of_information", "ranking", "region"))]
###2.2. Telephones_mobile_cellular
telephones_mobile = import("Telephones_mobile_cellular.csv")
names(telephones_mobile)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(telephones_mobile)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : chr "1,781,000,000" "1,143,000,000" "372,682,000" "316,553,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
telephones_mobile$value <- parse_number(telephones_mobile$value)
str(telephones_mobile)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : num 1.78e+09 1.14e+09 3.73e+08 3.17e+08 2.45e+08 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
telephones_mobile <- telephones_mobile %>%
rename(value_telemobi = value)
telephones_mobile <- telephones_mobile[, !(names(telephones_mobile) %in% c("slug", "date_of_information", "ranking", "region"))]
###2.3. Broadband_fixed_subscriptions.csv
broadband_fixed = import("Broadband_fixed_subscriptions.csv")
names(broadband_fixed)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(broadband_fixed)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : chr "483,549,500" "121,176,000" "44,000,791" "36,344,670" ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
broadband_fixed$value <- parse_number(broadband_fixed$value)
str(broadband_fixed)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : num 4.84e+08 1.21e+08 4.40e+07 3.63e+07 3.62e+07 ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
broadband_fixed <- broadband_fixed %>%
rename(value_broad = value)
broadband_fixed <- broadband_fixed[, !(names(broadband_fixed) %in% c("slug", "date_of_information", "ranking", "region"))]
###3. ECONOMY
###3.1. Inflation Rate
inflation_rate = import("Inflation rate (consumer prices).csv")
names(inflation_rate)
## [1] "name" "slug" "%"
## [4] "date_of_information" "ranking" "region"
str(inflation_rate)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ % : chr "-6.69" "-0.9" "-0.73" "-0.5" ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
#inflation_rate$"%" <- parse_number(inflation_rate$"%", locale = locale(decimal_mark = ".", grouping_mark = " "))
#str(inflation_rate)
inflation_rate$"%" <- readr::parse_number(gsub("(?<!\\d),(?!\\d)|\\.(?!\\d)", "", inflation_rate$"%", perl = TRUE))
str(inflation_rate)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ % : num -6.69 -0.9 -0.73 -0.5 -0.4 -0.3 0 0 0.3 0.3 ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
inflation_rate <- inflation_rate %>%
rename("%_inflation" = "%")
inflation_rate <- inflation_rate[, !(names(inflation_rate) %in% c("slug", "date_of_information", "ranking", "region"))]
###3.2. Youth unemployment rate (ages 15-24)
youth_unemployment = import("Youth_unemployment_rate(ages 15-24).csv")
names(youth_unemployment)
## [1] "name" "slug" "%"
## [4] "date_of_information" "ranking" "region"
str(youth_unemployment)
## 'data.frame': 203 obs. of 6 variables:
## $ name : chr "Djibouti" "South Africa" "Eswatini" "Libya" ...
## $ slug : chr "djibouti" "south-africa" "eswatini" "libya" ...
## $ % : num 79.9 64.2 50.9 50.5 48.8 45.4 42.3 42.2 41.2 41.1 ...
## $ date_of_information: int 2021 2021 2021 2021 2020 2021 2021 2020 2021 2021 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Africa" "Africa" "Africa" ...
youth_unemployment <- youth_unemployment %>%
rename("%_youth" = "%")
youth_unemployment <- youth_unemployment[, !(names(youth_unemployment) %in% c("slug", "date_of_information", "ranking", "region"))]
###3.3. Public debt
public_debt = import("Public debt.csv")
names(public_debt)
## [1] "name" "slug" "% of GDP"
## [4] "date_of_information" "ranking" "region"
str(public_debt)
## 'data.frame': 210 obs. of 6 variables:
## $ name : chr "Greece" "Japan" "United Kingdom" "Singapore" ...
## $ slug : chr "greece" "japan" "united-kingdom" "singapore" ...
## $ % of GDP : num 237 216 185 154 147 ...
## $ date_of_information: chr "2021" "2021" "2021" "2021" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
public_debt <- public_debt[, !(names(public_debt) %in% c("slug", "date_of_information", "ranking", "region"))]
###3.4. Debt external
debt_external = import("Debt_external.csv")
names(debt_external)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(debt_external)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : chr "$20,275,951,000,000" "$8,722,000,000,000" "$6,356,000,000,000" "$5,671,463,000,000" ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
debt_external$value <- parse_number(debt_external$value)
str(debt_external)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : num 2.03e+13 8.72e+12 6.36e+12 5.67e+12 4.35e+12 ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
debt_external <- debt_external %>%
rename("value_debt" = value)
debt_external <- debt_external[, !(names(debt_external) %in% c("slug", "date_of_information", "ranking", "region"))]
###MERGE
gaaaa=merge(electricity,debt_external, by.x = "name", by.y = 'name')
gaaaa%>%
rmarkdown::paged_table()
bases_datos <- list(electricity, re_petroleum, carbon_emissions, energy_consumption, telephones_fixed, telephones_mobile, broadband_fixed, inflation_rate, youth_unemployment, public_debt, debt_external)
data_final <- bases_datos[[1]] # La base de datos combinada comienza con la primera base de datos
for (i in 2:length(bases_datos)) {
data_final <- merge(data_final, bases_datos[[i]], by = "name")
}
str(data_final)
## 'data.frame': 180 obs. of 12 variables:
## $ name : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ kW : num 776000 2531000 21694000 7344000 44731000 ...
## $ bbl/day : num 0 5638 627900 53480 669800 ...
## $ metric tonnes of CO2: num 7.89e+06 3.79e+06 1.52e+08 1.94e+07 1.93e+08 ...
## $ Btu/person : num 3227000 38442000 61433000 11693000 79083000 ...
## $ value_telefixed : num 146000 177000 5576000 94000 7615000 ...
## $ value_telemobi : num 22678000 2782000 49019000 23978000 60236000 ...
## $ value_broad : num 26570 508937 3790459 230610 9571562 ...
## $ %_inflation : num 2.3 6.73 9.27 25.75 25.7 ...
## $ %_youth : num 20.2 27.8 31.9 18.5 29.9 36.1 10.8 11.4 16.5 30.8 ...
## $ % of GDP : num 7 82.4 27.5 65 57.6 ...
## $ value_debt : num 2.84e+08 9.31e+09 5.57e+09 4.21e+10 2.79e+11 ...
###Pregunta 1: Análisis Factorial
###Pasos previos
names(data_final)
## [1] "name" "kW" "bbl/day"
## [4] "metric tonnes of CO2" "Btu/person" "value_telefixed"
## [7] "value_telemobi" "value_broad" "%_inflation"
## [10] "%_youth" "% of GDP" "value_debt"
data_final[data_final == 0] <- NA
dontselect=c("name")
select=setdiff(names(data_final),dontselect)
theData=data_final[,select]
# usaremos:
library(magrittr)
head(theData,10)%>%
rmarkdown::paged_table()
###Calculando correlaciones entre todas las variables
library(polycor)
corMatrix=polycor::hetcor(theData)$correlations
round(corMatrix,2)
## kW bbl/day metric tonnes of CO2 Btu/person
## kW 1.00 0.83 0.99 0.08
## bbl/day 0.83 1.00 0.81 0.20
## metric tonnes of CO2 0.99 0.81 1.00 0.09
## Btu/person 0.08 0.20 0.09 1.00
## value_telefixed 0.96 0.80 0.94 0.08
## value_telemobi 0.87 0.61 0.88 -0.06
## value_broad 0.96 0.66 0.96 0.04
## %_inflation -0.02 0.00 -0.02 -0.02
## %_youth -0.09 -0.09 -0.09 -0.22
## % of GDP 0.08 0.13 0.03 0.06
## value_debt 0.48 0.75 0.42 0.23
## value_telefixed value_telemobi value_broad %_inflation
## kW 0.96 0.87 0.96 -0.02
## bbl/day 0.80 0.61 0.66 0.00
## metric tonnes of CO2 0.94 0.88 0.96 -0.02
## Btu/person 0.08 -0.06 0.04 -0.02
## value_telefixed 1.00 0.81 0.93 -0.02
## value_telemobi 0.81 1.00 0.85 -0.03
## value_broad 0.93 0.85 1.00 -0.02
## %_inflation -0.02 -0.03 -0.02 1.00
## %_youth -0.11 -0.04 -0.10 -0.03
## % of GDP 0.19 -0.03 0.04 -0.06
## value_debt 0.53 0.19 0.30 -0.03
## %_youth % of GDP value_debt
## kW -0.09 0.08 0.48
## bbl/day -0.09 0.13 0.75
## metric tonnes of CO2 -0.09 0.03 0.42
## Btu/person -0.22 0.06 0.23
## value_telefixed -0.11 0.19 0.53
## value_telemobi -0.04 -0.03 0.19
## value_broad -0.10 0.04 0.30
## %_inflation -0.03 -0.06 -0.03
## %_youth 1.00 0.20 -0.14
## % of GDP 0.20 1.00 0.36
## value_debt -0.14 0.36 1.00
###Graficamos las correlaciones
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)
###AF
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.75
## MSA for each item =
## kW bbl/day metric tonnes of CO2
## 0.83 0.68 0.79
## Btu/person value_telefixed value_telemobi
## 0.43 0.88 0.72
## value_broad %_inflation %_youth
## 0.68 0.25 0.42
## % of GDP value_debt
## 0.48 0.77
#Verificar si la matriz de correlación es adecuada
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
#Determinar en cuantos factores o variables latentes podrÃamos redimensionar la data
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
#Redimensionar a número menor de factores
library(GPArotation)
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa <- fa(theData,
nfactors = 3,
cor = 'mixed',
rotate = "varimax",
fm="minres")
## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.1266e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12583e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12583e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12582e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12582e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.1266e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12589e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12584e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12582e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.1266e+09, f=25, theta=6.12581e+09, ..): not converged in 1000000
## iter.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa$loadings)
##
## Loadings:
## MR1 MR2 MR3
## kW 0.971 0.246
## bbl/day 0.686 0.589
## metric tonnes of CO2 0.979 0.206
## Btu/person 0.322
## value_telefixed 0.917 0.301 0.141
## value_telemobi 0.903
## value_broad 0.962
## %_inflation
## %_youth -0.169 0.277
## % of GDP 0.213 0.472
## value_debt 0.262 0.941 0.191
##
## MR1 MR2 MR3
## SS loadings 5.027 1.608 0.371
## Proportion Var 0.457 0.146 0.034
## Cumulative Var 0.457 0.603 0.637
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2 MR3
## kW 0.971
## bbl/day 0.686 0.589
## metric tonnes of CO2 0.979
## Btu/person
## value_telefixed 0.917
## value_telemobi 0.903
## value_broad 0.962
## %_inflation
## %_youth
## % of GDP
## value_debt 0.941
##
## MR1 MR2 MR3
## SS loadings 5.027 1.608 0.371
## Proportion Var 0.457 0.146 0.034
## Cumulative Var 0.457 0.603 0.637
fa.diagram(resfa,main = "Resultados del EFA")
#Evaluando resultados
sort(resfa$communality)
## %_inflation %_youth Btu/person
## 0.006663116 0.107622134 0.110143157
## % of GDP value_telemobi bbl/day
## 0.269284456 0.816208782 0.818299104
## value_broad value_telefixed value_debt
## 0.930382235 0.950995839 0.991087793
## metric tonnes of CO2 kW
## 1.001893848 1.003662906
sort(resfa$complexity)
## value_telemobi value_broad %_inflation
## 1.003324 1.009656 1.025048
## metric tonnes of CO2 Btu/person kW
## 1.092190 1.126941 1.128750
## value_debt value_telefixed % of GDP
## 1.241835 1.264896 1.401224
## %_youth bbl/day
## 1.724001 1.957138
resfa$TLI
## [1] -4656113
resfa$rms
## [1] 0.01647071
resfa$RMSEA
## RMSEA lower upper confidence
## 1166.819 NA 1169.999 0.900
resfa$BIC
## [1] 6126595136
###PREGUNTA 2
cor(data_final[,c(2:12)])
## kW bbl/day metric tonnes of CO2 Btu/person
## kW 1.000000000 NA 0.993718658 NA
## bbl/day NA 1 NA NA
## metric tonnes of CO2 0.993718658 NA 1.000000000 NA
## Btu/person NA NA NA 1
## value_telefixed NA NA NA NA
## value_telemobi 0.870539984 NA 0.887093225 NA
## value_broad 0.963448718 NA 0.962391896 NA
## %_inflation -0.003864285 NA -0.007582493 NA
## %_youth -0.074977408 NA -0.071206012 NA
## % of GDP NA NA NA NA
## value_debt NA NA NA NA
## value_telefixed value_telemobi value_broad %_inflation
## kW NA 0.87053998 0.963448718 -0.003864285
## bbl/day NA NA NA NA
## metric tonnes of CO2 NA 0.88709323 0.962391896 -0.007582493
## Btu/person NA NA NA NA
## value_telefixed 1 NA NA NA
## value_telemobi NA 1.00000000 0.849225178 -0.013548883
## value_broad NA 0.84922518 1.000000000 -0.008542596
## %_inflation NA -0.01354888 -0.008542596 1.000000000
## %_youth NA -0.05225389 -0.073855128 -0.026774124
## % of GDP NA NA NA NA
## value_debt NA NA NA NA
## %_youth % of GDP value_debt
## kW -0.07497741 NA NA
## bbl/day NA NA NA
## metric tonnes of CO2 -0.07120601 NA NA
## Btu/person NA NA NA
## value_telefixed NA NA NA
## value_telemobi -0.05225389 NA NA
## value_broad -0.07385513 NA NA
## %_inflation -0.02677412 NA NA
## %_youth 1.00000000 NA NA
## % of GDP NA 1 NA
## value_debt NA NA 1
datos_sin_na <- data_final[complete.cases(data_final), ]
cor(datos_sin_na[,c(2:12)])
## kW bbl/day metric tonnes of CO2 Btu/person
## kW 1.00000000 0.825596828 0.99354664 0.08497303
## bbl/day 0.82559683 1.000000000 0.81267695 0.20415268
## metric tonnes of CO2 0.99354664 0.812676953 1.00000000 0.09206464
## Btu/person 0.08497303 0.204152685 0.09206464 1.00000000
## value_telefixed 0.95980779 0.802058773 0.93905789 0.07880541
## value_telemobi 0.86689704 0.614168876 0.88448189 -0.06377627
## value_broad 0.96298937 0.661855455 0.96173071 0.03776922
## %_inflation -0.01585127 0.002884461 -0.01916433 -0.02269951
## %_youth -0.09299223 -0.092420098 -0.08794022 -0.21993919
## % of GDP 0.07917610 0.132983658 0.03171408 0.05968754
## value_debt 0.47508934 0.754404362 0.42336012 0.22704345
## value_telefixed value_telemobi value_broad %_inflation
## kW 0.95980779 0.86689704 0.96298937 -0.015851267
## bbl/day 0.80205877 0.61416888 0.66185545 0.002884461
## metric tonnes of CO2 0.93905789 0.88448189 0.96173071 -0.019164328
## Btu/person 0.07880541 -0.06377627 0.03776922 -0.022699512
## value_telefixed 1.00000000 0.81146230 0.92614603 -0.023072723
## value_telemobi 0.81146230 1.00000000 0.84695230 -0.027336627
## value_broad 0.92614603 0.84695230 1.00000000 -0.018362244
## %_inflation -0.02307272 -0.02733663 -0.01836224 1.000000000
## %_youth -0.10535423 -0.03628953 -0.09510730 -0.033131537
## % of GDP 0.19067439 -0.02579508 0.04280293 -0.060841078
## value_debt 0.53466241 0.18665655 0.30306443 -0.028830948
## %_youth % of GDP value_debt
## kW -0.09299223 0.07917610 0.47508934
## bbl/day -0.09242010 0.13298366 0.75440436
## metric tonnes of CO2 -0.08794022 0.03171408 0.42336012
## Btu/person -0.21993919 0.05968754 0.22704345
## value_telefixed -0.10535423 0.19067439 0.53466241
## value_telemobi -0.03628953 -0.02579508 0.18665655
## value_broad -0.09510730 0.04280293 0.30306443
## %_inflation -0.03313154 -0.06084108 -0.02883095
## %_youth 1.00000000 0.19973133 -0.14432389
## % of GDP 0.19973133 1.00000000 0.35874968
## value_debt -0.14432389 0.35874968 1.00000000
dataClus=datos_sin_na[,c(2:12)]
row.names(dataClus)=datos_sin_na$name
library(cluster)
g.dist = daisy(dataClus, metric="gower")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
###Pregunta 3
names(data_final)
## [1] "name" "kW" "bbl/day"
## [4] "metric tonnes of CO2" "Btu/person" "value_telefixed"
## [7] "value_telemobi" "value_broad" "%_inflation"
## [10] "%_youth" "% of GDP" "value_debt"