library(rio)
broadband = import("broadband.csv")
str(broadband)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : chr "483,549,500" "121,176,000" "44,000,791" "36,344,670" ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
library(readr)
broadband$value <- parse_number(broadband$value)
str(broadband)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : num 4.84e+08 1.21e+08 4.40e+07 3.63e+07 3.62e+07 ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
table(broadband$region)
##
## Africa Australia and Oceania
## 53 17
## Central America and the Caribbean Central Asia
## 27 6
## East and Southeast Asia Europe
## 19 47
## Middle East North America
## 19 5
## South America South Asia
## 13 8
colnames(broadband)[colnames(broadband) == "value"] <- "bband"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
broadband = select(broadband, name, bband)
metric tonnes of co2
carboncito = import("carbon_emissions.csv")
str(carboncito)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metric tonnes of CO2: chr "10,773,248,000" "5,144,361,000" "2,314,738,000" "1,848,070,000" ...
## $ date_of_information : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
colnames(carboncito)[colnames(carboncito) == "metric tonnes of CO2"] <- "metrics"
carboncito$metrics <- parse_number(carboncito$metrics)
str(carboncito)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metrics : num 1.08e+10 5.14e+09 2.31e+09 1.85e+09 1.10e+09 ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
colnames(carboncito)[colnames(carboncito) == "metrics"] <- "carbon_emi"
library(dplyr)
carboncito = select(carboncito, name, carbon_emi)
pafuera = import("debt_external.csv")
str(pafuera)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : chr "$20,275,951,000,000" "$8,722,000,000,000" "$6,356,000,000,000" "$5,671,463,000,000" ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
pafuera$value <- parse_number(pafuera$value)
str(pafuera)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : num 2.03e+13 8.72e+12 6.36e+12 5.67e+12 4.35e+12 ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
colnames(pafuera)[colnames(pafuera) == "value"] <- "externa_deuda"
library(dplyr)
pafuera = select(pafuera, name, externa_deuda)
Kw
pikachu = import("electricity.csv")
str(pikachu)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kW : chr "2,217,925,000" "1,143,266,000" "432,768,000" "348,666,000" ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
colnames(pikachu)[colnames(pikachu) == "kW"] <- "kw"
pikachu$kw <- parse_number(pikachu$kw)
str(pikachu)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kw : num 2.22e+09 1.14e+09 4.33e+08 3.49e+08 2.76e+08 ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
colnames(pikachu)[colnames(pikachu) == "kw"] <- "kw_electricity"
library(dplyr)
pikachu = select(pikachu, name, kw_electricity)
Btu/person
powerade = import("energy_compercap.csv")
str(powerade)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ Btu/person : chr "723,582,000" "639,951,000" "547,976,000" "471,788,000" ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
names(powerade)
## [1] "name" "slug" "Btu/person"
## [4] "date_of_information" "ranking" "region"
library(dplyr)
powerade <- powerade %>% rename(btu = `Btu/person`)
powerade$btu <- parse_number(powerade$btu)
str(powerade)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ btu : num 7.24e+08 6.40e+08 5.48e+08 4.72e+08 4.15e+08 ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
colnames(powerade)[colnames(powerade) == "btu"] <- "btuperson"
library(dplyr)
powerade = select(powerade, name, btuperson)
%
alangarcia = import("inflation.csv")
str(alangarcia)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ % : chr "-6.69" "-0.9" "-0.73" "-0.5" ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
alangarcia <- alangarcia %>% rename(percentage = `%`)
alangarcia$percentage <- readr::parse_number(gsub("(?<!\\d),(?!\\d)|\\.(?!\\d)", "", alangarcia$percentage, perl = TRUE))
str(alangarcia)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ percentage : num -6.69 -0.9 -0.73 -0.5 -0.4 -0.3 0 0 0.3 0.3 ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
colnames(alangarcia)[colnames(alangarcia) == "percentage"] <- "infla_percent"
library(dplyr)
alangarcia = select(alangarcia, name, infla_percent)
perapad = import("mobile_celular.csv")
str(perapad)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : chr "1,781,000,000" "1,143,000,000" "372,682,000" "316,553,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
perapad$value <- parse_number(perapad$value)
str(perapad)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : num 1.78e+09 1.14e+09 3.73e+08 3.17e+08 2.45e+08 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
colnames(perapad)[colnames(perapad) == "value"] <- "mobile_cellular"
library(dplyr)
perapad = select(perapad, name, mobile_cellular)
BBL/DAY
derrame = import("petroleum_production.csv")
str(derrame)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl/day : chr "20,300,000" "11,510,000" "6,076,000" "4,897,000" ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
derrame <- derrame %>% rename(bbl = `bbl/day`)
derrame$bbl <- parse_number(derrame$bbl)
str(derrame)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl : num 20300000 11510000 6076000 4897000 3467000 ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
colnames(derrame)[colnames(derrame) == "bbl"] <- "barrels_pd"
library(dplyr)
derrame = select(derrame, name, barrels_pd)
% OF GDP
morosos = import("public_debt.csv")
str(morosos)
## 'data.frame': 210 obs. of 6 variables:
## $ name : chr "Greece" "Japan" "United Kingdom" "Singapore" ...
## $ slug : chr "greece" "japan" "united-kingdom" "singapore" ...
## $ % of GDP : num 237 216 185 154 147 ...
## $ date_of_information: chr "2021" "2021" "2021" "2021" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
morosos <- morosos %>% rename(gdp = `% of GDP`)
colnames(morosos)[colnames(morosos) == "gdp"] <- "publicdebt_percent"
library(dplyr)
morosos = select(morosos, name, publicdebt_percent)
cablemagico = import("telephone_fixedlines.csv")
str(cablemagico)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : chr "179,414,000" "91,623,000" "60,721,000" "38,580,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
cablemagico$value <- parse_number(cablemagico$value)
str(cablemagico)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : num 1.79e+08 9.16e+07 6.07e+07 3.86e+07 3.77e+07 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
colnames(cablemagico)[colnames(cablemagico) == "value"] <- "fixedlines"
library(dplyr)
cablemagico = select(cablemagico, name, fixedlines)
%
otarola = import("youth_unemployed.csv")
otarola <- otarola %>% rename(percent = `%`)
str(otarola)
## 'data.frame': 203 obs. of 6 variables:
## $ name : chr "Djibouti" "South Africa" "Eswatini" "Libya" ...
## $ slug : chr "djibouti" "south-africa" "eswatini" "libya" ...
## $ percent : num 79.9 64.2 50.9 50.5 48.8 45.4 42.3 42.2 41.2 41.1 ...
## $ date_of_information: int 2021 2021 2021 2021 2020 2021 2021 2020 2021 2021 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Africa" "Africa" "Africa" ...
colnames(otarola)[colnames(otarola) == "percent"] <- "unemployed_pc"
library(dplyr)
otarola = select(otarola, name, unemployed_pc)
library(dplyr)
library(purrr)
dataza <- list(alangarcia, broadband, cablemagico, carboncito, derrame, morosos, otarola, pafuera, perapad, pikachu, powerade)
dataza <- reduce(dataza, full_join, by = "name")
table(dataza$name)
##
## Afghanistan
## 1
## Albania
## 1
## Algeria
## 1
## American Samoa
## 1
## Andorra
## 1
## Angola
## 1
## Anguilla
## 1
## Antarctica
## 1
## Antigua and Barbuda
## 1
## Argentina
## 1
## Armenia
## 1
## Aruba
## 1
## Australia
## 1
## Austria
## 1
## Azerbaijan
## 1
## Bahamas, The
## 1
## Bahrain
## 1
## Bangladesh
## 1
## Barbados
## 1
## Belarus
## 1
## Belgium
## 1
## Belize
## 1
## Benin
## 1
## Bermuda
## 1
## Bhutan
## 1
## Bolivia
## 1
## Bosnia and Herzegovina
## 1
## Botswana
## 1
## Brazil
## 1
## British Virgin Islands
## 1
## Brunei
## 1
## Bulgaria
## 1
## Burkina Faso
## 1
## Burma
## 1
## Burundi
## 1
## Cabo Verde
## 1
## Cambodia
## 1
## Cameroon
## 1
## Canada
## 1
## Cayman Islands
## 1
## Central African Republic
## 1
## Chad
## 1
## Chile
## 1
## China
## 1
## Colombia
## 1
## Comoros
## 1
## Congo, Democratic Republic of the
## 1
## Congo, Republic of the
## 1
## Cook Islands
## 1
## Costa Rica
## 1
## Cote d'Ivoire
## 1
## Croatia
## 1
## Cuba
## 1
## Curacao
## 1
## Cyprus
## 1
## Czechia
## 1
## Denmark
## 1
## Djibouti
## 1
## Dominica
## 1
## Dominican Republic
## 1
## Ecuador
## 1
## Egypt
## 1
## El Salvador
## 1
## Equatorial Guinea
## 1
## Eritrea
## 1
## Estonia
## 1
## Eswatini
## 1
## Ethiopia
## 1
## Falkland Islands (Islas Malvinas)
## 1
## Faroe Islands
## 1
## Fiji
## 1
## Finland
## 1
## France
## 1
## French Polynesia
## 1
## Gabon
## 1
## Gambia, The
## 1
## Gaza Strip
## 1
## Georgia
## 1
## Germany
## 1
## Ghana
## 1
## Gibraltar
## 1
## Greece
## 1
## Greenland
## 1
## Grenada
## 1
## Guam
## 1
## Guatemala
## 1
## Guernsey
## 1
## Guinea
## 1
## Guinea-Bissau
## 1
## Guyana
## 1
## Haiti
## 1
## Honduras
## 1
## Hong Kong
## 1
## Hungary
## 1
## Iceland
## 1
## India
## 1
## Indonesia
## 1
## Iran
## 1
## Iraq
## 1
## Ireland
## 1
## Isle of Man
## 1
## Israel
## 1
## Italy
## 1
## Jamaica
## 1
## Japan
## 1
## Jersey
## 1
## Jordan
## 1
## Kazakhstan
## 1
## Kenya
## 1
## Kiribati
## 1
## Korea, North
## 1
## Korea, South
## 1
## Kosovo
## 1
## Kuwait
## 1
## Kyrgyzstan
## 1
## Laos
## 1
## Latvia
## 1
## Lebanon
## 1
## Lesotho
## 1
## Liberia
## 1
## Libya
## 1
## Liechtenstein
## 1
## Lithuania
## 1
## Luxembourg
## 1
## Macau
## 1
## Madagascar
## 1
## Malawi
## 1
## Malaysia
## 1
## Maldives
## 1
## Mali
## 1
## Malta
## 1
## Marshall Islands
## 1
## Mauritania
## 1
## Mauritius
## 1
## Mexico
## 1
## Micronesia, Federated States of
## 1
## Moldova
## 1
## Monaco
## 1
## Mongolia
## 1
## Montenegro
## 1
## Montserrat
## 1
## Morocco
## 1
## Mozambique
## 1
## Namibia
## 1
## Nauru
## 1
## Nepal
## 1
## Netherlands
## 1
## New Caledonia
## 1
## New Zealand
## 1
## Nicaragua
## 1
## Niger
## 1
## Nigeria
## 1
## Niue
## 1
## North Macedonia
## 1
## Northern Mariana Islands
## 1
## Norway
## 1
## Oman
## 1
## Pakistan
## 1
## Palau
## 1
## Panama
## 1
## Papua New Guinea
## 1
## Paraguay
## 1
## Peru
## 1
## Philippines
## 1
## Poland
## 1
## Portugal
## 1
## Puerto Rico
## 1
## Qatar
## 1
## Romania
## 1
## Russia
## 1
## Rwanda
## 1
## Saint Helena, Ascension, and Tristan da Cunha
## 1
## Saint Kitts and Nevis
## 1
## Saint Lucia
## 1
## Saint Martin
## 1
## Saint Pierre and Miquelon
## 1
## Saint Vincent and the Grenadines
## 1
## Samoa
## 1
## San Marino
## 1
## Sao Tome and Principe
## 1
## Saudi Arabia
## 1
## Senegal
## 1
## Serbia
## 1
## Seychelles
## 1
## Sierra Leone
## 1
## Singapore
## 1
## Sint Maarten
## 1
## Slovakia
## 1
## Slovenia
## 1
## Solomon Islands
## 1
## Somalia
## 1
## South Africa
## 1
## South Sudan
## 1
## Spain
## 1
## Sri Lanka
## 1
## Sudan
## 1
## Suriname
## 1
## Sweden
## 1
## Switzerland
## 1
## Syria
## 1
## Taiwan
## 1
## Tajikistan
## 1
## Tanzania
## 1
## Thailand
## 1
## Timor-Leste
## 1
## Togo
## 1
## Tokelau
## 1
## Tonga
## 1
## Trinidad and Tobago
## 1
## Tunisia
## 1
## Turkey (Turkiye)
## 1
## Turkmenistan
## 1
## Turks and Caicos Islands
## 1
## Tuvalu
## 1
## Uganda
## 1
## Ukraine
## 1
## United Arab Emirates
## 1
## United Kingdom
## 1
## United States
## 1
## Uruguay
## 1
## Uzbekistan
## 1
## Vanuatu
## 1
## Venezuela
## 1
## Vietnam
## 1
## Virgin Islands
## 1
## Wake Island
## 1
## Wallis and Futuna
## 1
## West Bank
## 1
## Yemen
## 1
## Zambia
## 1
## Zimbabwe
## 1
dontselect=c("name")
select=setdiff(names(dataza),dontselect)
dataf=dataza[,select]
dataf <- dataf[complete.cases(dataf), ]
# usaremos:
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
head(dataf,10)%>%
rmarkdown::paged_table()
library(polycor)
corMatrix=polycor::hetcor(dataf)$correlations
round(corMatrix,2)
## infla_percent bband fixedlines carbon_emi barrels_pd
## infla_percent 1.00 -0.01 -0.01 -0.01 0.02
## bband -0.01 1.00 0.93 0.96 0.67
## fixedlines -0.01 0.93 1.00 0.94 0.81
## carbon_emi -0.01 0.96 0.94 1.00 0.82
## barrels_pd 0.02 0.67 0.81 0.82 1.00
## publicdebt_percent -0.04 0.06 0.19 0.05 0.14
## unemployed_pc -0.03 -0.07 -0.09 -0.07 -0.08
## externa_deuda -0.02 0.32 0.55 0.44 0.75
## mobile_cellular -0.01 0.85 0.82 0.89 0.63
## kw_electricity 0.00 0.96 0.96 0.99 0.83
## btuperson 0.00 0.07 0.13 0.13 0.24
## publicdebt_percent unemployed_pc externa_deuda
## infla_percent -0.04 -0.03 -0.02
## bband 0.06 -0.07 0.32
## fixedlines 0.19 -0.09 0.55
## carbon_emi 0.05 -0.07 0.44
## barrels_pd 0.14 -0.08 0.75
## publicdebt_percent 1.00 0.10 0.30
## unemployed_pc 0.10 1.00 -0.11
## externa_deuda 0.30 -0.11 1.00
## mobile_cellular 0.01 -0.05 0.21
## kw_electricity 0.09 -0.07 0.49
## btuperson 0.08 -0.14 0.28
## mobile_cellular kw_electricity btuperson
## infla_percent -0.01 0.00 0.00
## bband 0.85 0.96 0.07
## fixedlines 0.82 0.96 0.13
## carbon_emi 0.89 0.99 0.13
## barrels_pd 0.63 0.83 0.24
## publicdebt_percent 0.01 0.09 0.08
## unemployed_pc -0.05 -0.07 -0.14
## externa_deuda 0.21 0.49 0.28
## mobile_cellular 1.00 0.87 0.00
## kw_electricity 0.87 1.00 0.12
## btuperson 0.00 0.12 1.00
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.76
## MSA for each item =
## infla_percent bband fixedlines carbon_emi
## 0.11 0.69 0.87 0.79
## barrels_pd publicdebt_percent unemployed_pc externa_deuda
## 0.69 0.63 0.51 0.80
## mobile_cellular kw_electricity btuperson
## 0.73 0.83 0.58
cortest.bartlett(corMatrix,n=nrow(dataf))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(dataf, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
library(GPArotation)
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa <- fa(dataf,
nfactors = 3,
cor = 'mixed',
rotate = "varimax", #oblimin?
fm="minres")
## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27523e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27445e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27445e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27445e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27523e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27451e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27446e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=6.27523e+09, f=25, theta=6.27444e+09, ..): not converged in 1000000
## iter.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa$loadings)
##
## Loadings:
## MR1 MR2 MR3
## infla_percent
## bband 0.963
## fixedlines 0.914 0.314 0.127
## carbon_emi 0.978 0.204
## barrels_pd 0.693 0.606
## publicdebt_percent 0.244 0.439
## unemployed_pc -0.156 0.312
## externa_deuda 0.268 0.905 0.154
## mobile_cellular 0.905
## kw_electricity 0.970 0.248
## btuperson 0.342 -0.109
##
## MR1 MR2 MR3
## SS loadings 5.037 1.593 0.352
## Proportion Var 0.458 0.145 0.032
## Cumulative Var 0.458 0.603 0.635
fa.diagram(resfa,main = "Resultados del EFA")
sort(resfa$communality)
## infla_percent unemployed_pc btuperson publicdebt_percent
## 0.006790845 0.123854086 0.130198302 0.253839674
## mobile_cellular barrels_pd externa_deuda bband
## 0.820752389 0.846802219 0.914264142 0.930929257
## fixedlines carbon_emi kw_electricity
## 0.951030807 1.000766881 1.002567929
bband kw_electricity externa_deuda
dataza1 <- dataza[complete.cases(dataza), ]
library(dplyr)
dataza1 = select(dataza1, name, bband, kw_electricity, externa_deuda)
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse, symdiff
## The following object is masked from 'package:base':
##
## isFALSE
dataza1[,c(2:4)]=normalize(dataza1[,c(2:4)],method='standardize')
dataClus=dataza1[,c(2:4)]
row.names(dataClus)=dataza1$name
library(cluster)
g.dist = daisy(dataClus, metric="gower")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
set.seed(123)
library(factoextra)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
res.agnes<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
# ver
head(dataClus,15)%>%kbl()%>%kable_styling()
| bband | kw_electricity | externa_deuda | agnes | |
|---|---|---|---|---|
| Timor-Leste | -0.1809794 | -0.2206631 | -0.2654315 | 1 |
| Comoros | -0.1809530 | -0.2219623 | -0.2654913 | 1 |
| Macau | -0.1754379 | -0.2196508 | -0.2655982 | 1 |
| Benin | -0.1801824 | -0.2196665 | -0.2640971 | 1 |
| New Caledonia | -0.1795156 | -0.2165567 | -0.2655383 | 1 |
| Somalia | -0.1778099 | -0.2216701 | -0.2627608 | 1 |
| Bolivia | -0.1561445 | -0.2021401 | -0.2587402 | 1 |
| Puerto Rico | -0.1630908 | -0.1898992 | -0.2351789 | 1 |
| Hong Kong | -0.1040766 | -0.1482198 | 0.6168992 | 1 |
| Bermuda | -0.1803684 | -0.2212475 | -0.2642518 | 1 |
| China | 12.7062736 | 11.3504475 | 0.8200915 | 2 |
| Bosnia and Herzegovina | -0.1604486 | -0.1972301 | -0.2597788 | 1 |
| Kiribati | -0.1809765 | -0.2220875 | -0.2655763 | 1 |
| Afghanistan | -0.1802733 | -0.2180960 | -0.2654462 | 1 |
| Maldives | -0.1792841 | -0.2193013 | -0.2651438 | 1 |
silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$country=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'country']%>%sort()
poorAGNES
## [1] "Switzerland"
fviz_silhouette(res.agnes,print.summary = F)
datareg <- dataza[complete.cases(dataza), ]
hp1 = formula(publicdebt_percent ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp1, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp1, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -71.591 -21.396 -4.417 14.348 175.262
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.286e+01 3.515e+00 15.040 < 2e-16 ***
## bband -5.609e-07 7.054e-07 -0.795 0.42760
## fixedlines 2.126e-06 6.689e-07 3.178 0.00176 **
## carbon_emi -6.824e-08 3.576e-08 -1.908 0.05802 .
## barrels_pd -3.192e-06 7.091e-06 -0.450 0.65324
## mobile_cellular -2.168e-08 4.804e-08 -0.451 0.65243
## kw_electricity 3.113e-07 1.991e-07 1.564 0.11974
## btuperson 1.480e-08 2.367e-08 0.625 0.53272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.17 on 172 degrees of freedom
## Multiple R-squared: 0.1848, Adjusted R-squared: 0.1516
## F-statistic: 5.568 on 7 and 172 DF, p-value: 8.502e-06
hp2 = formula(externa_deuda ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp2, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp2, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.089e+12 -1.956e+11 -6.946e+10 3.159e+09 6.394e+12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.165e+10 9.251e+10 0.450 0.653131
## bband -4.343e+04 1.857e+04 -2.339 0.020464 *
## fixedlines 5.718e+04 1.760e+04 3.248 0.001398 **
## carbon_emi -3.699e+03 9.412e+02 -3.930 0.000123 ***
## barrels_pd 5.459e+05 1.866e+05 2.925 0.003910 **
## mobile_cellular -4.335e+03 1.264e+03 -3.429 0.000759 ***
## kw_electricity 2.442e+04 5.240e+03 4.660 6.31e-06 ***
## btuperson 1.068e+03 6.231e+02 1.715 0.088221 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.994e+11 on 172 degrees of freedom
## Multiple R-squared: 0.7772, Adjusted R-squared: 0.7681
## F-statistic: 85.72 on 7 and 172 DF, p-value: < 2.2e-16
hp3 = formula(infla_percent ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp3, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp3, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6552 -885 -737 -685 144275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.337e+02 1.139e+03 0.644 0.520
## bband 1.782e-04 2.285e-04 0.780 0.437
## fixedlines -1.511e-04 2.167e-04 -0.697 0.487
## carbon_emi -9.631e-06 1.158e-05 -0.831 0.407
## barrels_pd 2.049e-03 2.297e-03 0.892 0.374
## mobile_cellular 8.334e-06 1.556e-05 0.536 0.593
## kw_electricity 1.681e-06 6.450e-05 0.026 0.979
## btuperson 8.727e-08 7.669e-06 0.011 0.991
##
## Residual standard error: 11070 on 172 degrees of freedom
## Multiple R-squared: 0.006893, Adjusted R-squared: -0.03352
## F-statistic: 0.1705 on 7 and 172 DF, p-value: 0.9908
hp4 = formula(unemployed_pc ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp4, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp4, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.963 -9.271 -2.473 7.929 58.864
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.117e+01 1.292e+00 16.394 <2e-16 ***
## bband -2.409e-07 2.592e-07 -0.929 0.3540
## fixedlines 6.035e-11 2.458e-07 0.000 0.9998
## carbon_emi 9.674e-09 1.314e-08 0.736 0.4627
## barrels_pd -2.153e-06 2.606e-06 -0.826 0.4099
## mobile_cellular -1.286e-08 1.766e-08 -0.728 0.4675
## kw_electricity 2.285e-08 7.317e-08 0.312 0.7552
## btuperson -1.526e-08 8.700e-09 -1.755 0.0811 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.56 on 172 degrees of freedom
## Multiple R-squared: 0.02942, Adjusted R-squared: -0.01009
## F-statistic: 0.7447 on 7 and 172 DF, p-value: 0.6344
```