library(readr)
Broadband <- read_csv("Broadband - fixed subscriptions.csv")
## Rows: 214 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Broadband)
library(readr)
Carbon <- read_csv("Carbon dioxide emissions.csv")
## Rows: 218 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): metric tonnes of CO2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Carbon)
library(readr)
Debt <- read_csv("Debt - external.csv")
## Rows: 207 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): name, slug, value, date_of_information, region
## dbl (1): ranking
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Debt)
library(readr)
Electricity<- read_csv("Electricity - installed generating capacity.csv")
## Rows: 213 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): kW
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Electricity)
library(readr)
Energy <- read_csv("Energy consumption per capita.csv")
## Rows: 212 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): Btu/person
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Energy)
library(readr)
Inflation <- read_csv("Inflation rate (consumer prices).csv")
## Rows: 221 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): name, slug, date_of_information, region
## dbl (1): ranking
## num (1): %
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Inflation)
library(readr)
Public <- read_csv("Public debt.csv")
## Rows: 210 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): name, slug, date_of_information, region
## dbl (2): % of GDP, ranking
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Public)
library(readr)
Refined <- read_csv("Refined petroleum products - production.csv")
## Rows: 216 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): bbl/day
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Refined)
library(readr)
Telephones_fixed <- read_csv("Telephones - fixed lines.csv")
## Rows: 224 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Telephones_fixed)
library(readr)
Telephones_mobile <- read_csv("Telephones - mobile cellular.csv")
## Rows: 225 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Telephones_mobile)
library(readr)
Youth <- read_csv("Youth unemployment rate (ages 15-24).csv")
## Rows: 203 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (3): %, date_of_information, ranking
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Youth)
str(Public)
## spc_tbl_ [210 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ name : chr [1:210] "Greece" "Japan" "United Kingdom" "Singapore" ...
## $ slug : chr [1:210] "greece" "japan" "united-kingdom" "singapore" ...
## $ % of GDP : num [1:210] 237 216 185 154 147 ...
## $ date_of_information: chr [1:210] "2021" "2021" "2021" "2021" ...
## $ ranking : num [1:210] 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr [1:210] "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. slug = col_character(),
## .. `% of GDP` = col_double(),
## .. date_of_information = col_character(),
## .. ranking = col_double(),
## .. region = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
Public$date_of_information <- gsub("\\s+$", "", Public$date_of_information)
library(stringr)
Public$date_of_information <- str_replace(Public$date_of_information, "FY", "")
str(Public)
## spc_tbl_ [210 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ name : chr [1:210] "Greece" "Japan" "United Kingdom" "Singapore" ...
## $ slug : chr [1:210] "greece" "japan" "united-kingdom" "singapore" ...
## $ % of GDP : num [1:210] 237 216 185 154 147 ...
## $ date_of_information: chr [1:210] "2021" "2021" "2021" "2021" ...
## $ ranking : num [1:210] 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr [1:210] "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. slug = col_character(),
## .. `% of GDP` = col_double(),
## .. date_of_information = col_character(),
## .. ranking = col_double(),
## .. region = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
Cambiar 14/15 a año 2015 –> preguntar a Magallanes
fila_a_modificar <- 126
date_of_information <- substr(Public$date_of_information[fila_a_modificar], start = 4, stop = 5)
date_of_information <- as.numeric(date_of_information) + 2000
Public$date_of_information[fila_a_modificar] <- paste0(date_of_information)
Public$date_of_information <- as.numeric(Public$date_of_information)
str(Inflation)
## spc_tbl_ [221 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ name : chr [1:221] "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr [1:221] "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ % : num [1:221] -6.69 -0.9 -0.73 -0.5 -0.4 -0.3 0 0 0.3 0.3 ...
## $ date_of_information: chr [1:221] "2022" "2015" "2020" "2015" ...
## $ ranking : num [1:221] 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr [1:221] "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. slug = col_character(),
## .. `%` = col_number(),
## .. date_of_information = col_character(),
## .. ranking = col_double(),
## .. region = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
Inflation$date_of_information[9] <- gsub("January ", "", Inflation$date_of_information[9])
str(Youth)
## spc_tbl_ [203 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ name : chr [1:203] "Djibouti" "South Africa" "Eswatini" "Libya" ...
## $ slug : chr [1:203] "djibouti" "south-africa" "eswatini" "libya" ...
## $ % : num [1:203] 79.9 64.2 50.9 50.5 48.8 45.4 42.3 42.2 41.2 41.1 ...
## $ date_of_information: num [1:203] 2021 2021 2021 2021 2020 ...
## $ ranking : num [1:203] 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr [1:203] "Africa" "Africa" "Africa" "Africa" ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. slug = col_character(),
## .. `%` = col_double(),
## .. date_of_information = col_double(),
## .. ranking = col_double(),
## .. region = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
str(Debt)
## spc_tbl_ [207 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ name : chr [1:207] "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr [1:207] "united-states" "united-kingdom" "france" "germany" ...
## $ value : chr [1:207] "$20,275,951,000,000" "$8,722,000,000,000" "$6,356,000,000,000" "$5,671,463,000,000" ...
## $ date_of_information: chr [1:207] "2019" "2019" "2019" "2019" ...
## $ ranking : num [1:207] 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr [1:207] "North America" "Europe" "Europe" "Europe" ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. slug = col_character(),
## .. value = col_character(),
## .. date_of_information = col_character(),
## .. ranking = col_double(),
## .. region = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
Debt$date_of_information <- gsub(".*([0-9]{4})", "\\1", Debt$date_of_information)
–> “FY10/11” a solo “2011” PREGUNTAR AL PROFE
fila_a_modificar <- 185
nuevo_año <- gsub("FY(\\d{2})/(\\d{2})", "\\2", Debt$date_of_information[fila_a_modificar])
nuevo_año <- paste0("20", nuevo_año)
Debt$date_of_information[fila_a_modificar] <- nuevo_año
Debt$date_of_information<- as.numeric(Debt$date_of_information)
Debt$value <- as.numeric(gsub("[\\$,]", "", Debt$value))
Renombrar variables:
names(Carbon)[names(Carbon) == "metric tonnes of CO2"] <- "co2"
names(Energy)[names(Energy) == "Btu/person"] <- "BTU"
names(Inflation)[names(Inflation) == "%"] <- "porcentaje"
names(Public)[names(Public) == "% of GDP"] <- "GDP"
names(Refined)[names(Refined) == "bbl/day"] <- "bbl"
names(Youth)[names(Youth) == "%"] <- "porcentaje2"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Broadband <- select(Broadband, name, value)
Carbon <- select(Carbon, name, co2)
Debt <- select(Debt, name, value)
Electricity <- select(Electricity, name, kW)
Energy<- select(Energy, name, BTU)
Inflation <- select(Inflation, name, porcentaje)
Public <- select(Public, name, GDP)
Refined <- select(Refined, name, bbl)
Telephones_fixed <- select(Telephones_fixed, name, value)
Telephones_mobile <- select(Telephones_mobile, name, value)
Youth <- select(Youth, name, porcentaje2)
Broadband <- Broadband[complete.cases(Broadband), ]
Carbon <- Carbon[complete.cases(Carbon), ]
Debt <- Debt[complete.cases(Debt), ]
Electricity <- Electricity[complete.cases(Electricity), ]
Energy <- Energy[complete.cases(Energy), ]
Inflation <- Inflation[complete.cases(Inflation), ]
Public <- Public[complete.cases(Public), ]
Refined <- Refined[complete.cases(Refined), ]
Telephones_fixed <- Telephones_fixed[complete.cases(Telephones_fixed), ]
Telephones_mobile <- Telephones_mobile[complete.cases(Telephones_mobile), ]
Youth <- Youth[complete.cases(Youth), ]
merged_df <- merge(Broadband, Carbon, by = "name", all = TRUE)
merged_df1 <- merge(merged_df, Debt, by = "name", all = TRUE)
merged_df2 <- merge(merged_df1, Electricity, by = "name",all = TRUE)
merged_df3 <- merge(merged_df2, Energy, by = "name",all = TRUE)
merged_df4 <- merge(merged_df3, Inflation, by = "name",all = TRUE)
merged_df5 <- merge(merged_df4, Public, by = "name",all = TRUE)
merged_df6 <- merge(merged_df5, Refined, by = "name",all = TRUE)
merged_df7 <- merge(merged_df6, Telephones_fixed, by = "name",all = TRUE)
merged_df8 <- merge(merged_df7, Telephones_mobile, by = "name", suffixes = c(".data1", ".data2"))
merged_df9 <- merge(merged_df8, Youth, by = "name",all = TRUE)
names(merged_df9)
## [1] "name" "value.x" "co2" "value.y" "kW"
## [6] "BTU" "porcentaje" "GDP" "bbl" "value.data1"
## [11] "value.data2" "porcentaje2"
dontselect=c("name")
select=setdiff(names(merged_df9),dontselect)
theData=merged_df9[,select]
library(polycor)
## Warning: package 'polycor' was built under R version 4.3.3
corMatrix=polycor::hetcor(theData)$correlations
round(corMatrix,2)
## value.x co2 value.y kW BTU porcentaje GDP bbl
## value.x 1.00 0.96 0.32 0.96 0.07 -0.01 0.06 0.67
## co2 0.96 1.00 0.44 0.99 0.13 -0.01 0.05 0.82
## value.y 0.32 0.44 1.00 0.49 0.28 -0.02 0.30 0.75
## kW 0.96 0.99 0.49 1.00 0.12 0.00 0.09 0.83
## BTU 0.07 0.13 0.28 0.12 1.00 0.00 0.08 0.24
## porcentaje -0.01 -0.01 -0.02 0.00 0.00 1.00 -0.04 0.02
## GDP 0.06 0.05 0.30 0.09 0.08 -0.04 1.00 0.14
## bbl 0.67 0.82 0.75 0.83 0.24 0.02 0.14 1.00
## value.data1 0.93 0.94 0.55 0.96 0.13 -0.01 0.19 0.81
## value.data2 0.85 0.89 0.21 0.87 0.00 -0.01 0.01 0.63
## porcentaje2 -0.07 -0.07 -0.11 -0.07 -0.14 -0.03 0.10 -0.08
## value.data1 value.data2 porcentaje2
## value.x 0.93 0.85 -0.07
## co2 0.94 0.89 -0.07
## value.y 0.55 0.21 -0.11
## kW 0.96 0.87 -0.07
## BTU 0.13 0.00 -0.14
## porcentaje -0.01 -0.01 -0.03
## GDP 0.19 0.01 0.10
## bbl 0.81 0.63 -0.08
## value.data1 1.00 0.82 -0.09
## value.data2 0.82 1.00 -0.05
## porcentaje2 -0.09 -0.05 1.00
library(psych)
## Warning: package 'psych' was built under R version 4.3.2
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.76
## MSA for each item =
## value.x co2 value.y kW BTU porcentaje
## 0.69 0.79 0.80 0.83 0.58 0.11
## GDP bbl value.data1 value.data2 porcentaje2
## 0.63 0.69 0.87 0.73 0.51
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
#library(GPArotation)
resfa <- fa(theData,
nfactors = 3,
cor = 'mixed',
rotate = "varimax", #oblimin?
fm="minres")
## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27122e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27067e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27074e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27048e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27036e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27033e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa$loadings)
##
## Loadings:
## MR1 MR2 MR3
## value.x 0.963
## co2 0.979 0.202
## value.y 0.257 0.946
## kW 0.969 0.253
## BTU 0.308
## porcentaje
## GDP 0.284 0.113
## bbl 0.692 0.597
## value.data1 0.907 0.339
## value.data2 0.903
## porcentaje2 -0.130 0.988
##
## MR1 MR2 MR3
## SS loadings 5.015 1.669 1.001
## Proportion Var 0.456 0.152 0.091
## Cumulative Var 0.456 0.608 0.699
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2 MR3
## value.x 0.963
## co2 0.979
## value.y 0.946
## kW 0.969
## BTU
## porcentaje
## GDP
## bbl 0.692 0.597
## value.data1 0.907
## value.data2 0.903
## porcentaje2 0.988
##
## MR1 MR2 MR3
## SS loadings 5.015 1.669 1.001
## Proportion Var 0.456 0.152 0.091
## Cumulative Var 0.456 0.608 0.699
fa.diagram(resfa,main = "Resultados del EFA")
sort(resfa$communality)
## porcentaje GDP BTU value.data2 bbl value.x
## 0.000991509 0.094628887 0.107103202 0.816458070 0.835707988 0.932449076
## value.data1 value.y porcentaje2 co2 kW
## 0.937750818 0.962741401 0.995312984 0.999011831 1.003077347
sort(resfa$complexity)
## value.data2 value.x porcentaje2 porcentaje co2 kW
## 1.000474 1.012238 1.039826 1.075632 1.084642 1.135500
## value.y BTU value.data1 GDP bbl
## 1.149599 1.254178 1.273787 1.337513 1.961335
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.3.3
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa1 <- fa(theData,
nfactors = 3,
cor = 'mixed',
rotate = "oblimin", #oblimin?
fm="minres")
## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27122e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27067e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27074e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27049e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27036e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27033e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa1$loadings)
##
## Loadings:
## MR1 MR2 MR3
## value.x 1.004 -0.102
## co2 0.983
## value.y 0.982
## kW 0.958
## BTU 0.312 -0.112
## porcentaje
## GDP 0.317
## bbl 0.561 0.521
## value.data1 0.866 0.199
## value.data2 0.966 -0.182
## porcentaje2 0.997
##
## MR1 MR2 MR3
## SS loadings 4.895 1.528 1.019
## Proportion Var 0.445 0.139 0.093
## Cumulative Var 0.445 0.584 0.676
print(resfa1$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2 MR3
## value.x 1.004
## co2 0.983
## value.y 0.982
## kW 0.958
## BTU
## porcentaje
## GDP
## bbl 0.561 0.521
## value.data1 0.866
## value.data2 0.966
## porcentaje2 0.997
##
## MR1 MR2 MR3
## SS loadings 4.895 1.528 1.019
## Proportion Var 0.445 0.139 0.093
## Cumulative Var 0.445 0.584 0.676
fa.diagram(resfa,main = "Resultados del EFA")
sort(resfa1$communality)
## porcentaje GDP BTU value.data2 bbl value.x
## 0.000991509 0.094628887 0.107103202 0.816458070 0.835707988 0.932449076
## value.data1 value.y porcentaje2 co2 kW
## 0.937750818 0.962741401 0.995312984 0.999011831 1.003077347
pregunta 2
merged_df9 <- merged_df9[complete.cases(merged_df9), ]
library(dplyr)
merged_df9 = select(merged_df9, name, value.y, kW, BTU)
#install.packages("BBmisc")
library(BBmisc)
## Warning: package 'BBmisc' was built under R version 4.3.3
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse, symdiff
## The following object is masked from 'package:base':
##
## isFALSE
merged_df9[,c(2:4)]=normalize(merged_df9[,c(2:4)],method='standardize')
dataClus=merged_df9[,c(2:4)]
row.names(dataClus)=merged_df9$name
library(cluster)
g.dist = daisy(dataClus, metric="gower")
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.3
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
set.seed(123)
library(factoextra)
res.agnes<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
# Visualize
fviz_dend(res.agnes, cex = 0.7, horiz = T,main = "")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fviz_silhouette(res.agnes,print.summary = F)
silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$country=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'country']%>%sort()
poorAGNES
## [1] "Bermuda" "Czechia" "Italy" "Seychelles" "Turkmenistan"
aggregate(.~ agnes, data=dataClus,mean)
## agnes value.y kW BTU
## 1 1 -0.23245756 -0.14616331 -0.3888734
## 2 2 0.76604965 0.14700723 1.0002616
## 3 3 0.05465014 -0.04406216 3.6555764
## 4 4 0.82009151 11.35044751 0.1487211
## 5 5 10.58939909 5.74313859 1.8638180