slckd

library(readr)
Broadband <- read_csv("Broadband - fixed subscriptions.csv")

## Rows: 214 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Broadband)

library(readr)
Carbon <- read_csv("Carbon dioxide emissions.csv")

## Rows: 218 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): metric tonnes of CO2
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Carbon)

library(readr)
Debt <- read_csv("Debt - external.csv")

## Rows: 207 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): name, slug, value, date_of_information, region
## dbl (1): ranking
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Debt)

library(readr)
Electricity<- read_csv("Electricity - installed generating capacity.csv")

## Rows: 213 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): kW
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Electricity)

library(readr)
Energy <- read_csv("Energy consumption per capita.csv")

## Rows: 212 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): Btu/person
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Energy)

library(readr)
Inflation <- read_csv("Inflation rate (consumer prices).csv")

## Rows: 221 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): name, slug, date_of_information, region
## dbl (1): ranking
## num (1): %
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Inflation)

library(readr)
Public <- read_csv("Public debt.csv")

## Rows: 210 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): name, slug, date_of_information, region
## dbl (2): % of GDP, ranking
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Public)

library(readr)
Refined <- read_csv("Refined petroleum products - production.csv")

## Rows: 216 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): bbl/day
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Refined)

library(readr)
Telephones_fixed <- read_csv("Telephones - fixed lines.csv")

## Rows: 224 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Telephones_fixed)

library(readr)
Telephones_mobile <- read_csv("Telephones - mobile cellular.csv")

## Rows: 225 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Telephones_mobile)

library(readr)
Youth <- read_csv("Youth unemployment rate (ages 15-24).csv")

## Rows: 203 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (3): %, date_of_information, ranking
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Youth)

str(Public)

## spc_tbl_ [210 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ name               : chr [1:210] "Greece" "Japan" "United Kingdom" "Singapore" ...
##  $ slug               : chr [1:210] "greece" "japan" "united-kingdom" "singapore" ...
##  $ % of GDP           : num [1:210] 237 216 185 154 147 ...
##  $ date_of_information: chr [1:210] "2021" "2021" "2021" "2021" ...
##  $ ranking            : num [1:210] 1 2 3 4 5 6 7 8 9 10 ...
##  $ region             : chr [1:210] "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   name = col_character(),
##   ..   slug = col_character(),
##   ..   `% of GDP` = col_double(),
##   ..   date_of_information = col_character(),
##   ..   ranking = col_double(),
##   ..   region = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Public$date_of_information <- gsub("\\s+$", "", Public$date_of_information)

library(stringr)
Public$date_of_information <- str_replace(Public$date_of_information, "FY", "")

str(Public)

## spc_tbl_ [210 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ name               : chr [1:210] "Greece" "Japan" "United Kingdom" "Singapore" ...
##  $ slug               : chr [1:210] "greece" "japan" "united-kingdom" "singapore" ...
##  $ % of GDP           : num [1:210] 237 216 185 154 147 ...
##  $ date_of_information: chr [1:210] "2021" "2021" "2021" "2021" ...
##  $ ranking            : num [1:210] 1 2 3 4 5 6 7 8 9 10 ...
##  $ region             : chr [1:210] "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   name = col_character(),
##   ..   slug = col_character(),
##   ..   `% of GDP` = col_double(),
##   ..   date_of_information = col_character(),
##   ..   ranking = col_double(),
##   ..   region = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Cambiar 14/15 a año 2015 –> preguntar a Magallanes

fila_a_modificar <- 126
date_of_information <- substr(Public$date_of_information[fila_a_modificar], start = 4, stop = 5)
date_of_information <- as.numeric(date_of_information) + 2000
Public$date_of_information[fila_a_modificar] <- paste0(date_of_information)

Public$date_of_information <- as.numeric(Public$date_of_information)

str(Inflation)

## spc_tbl_ [221 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ name               : chr [1:221] "South Sudan" "Andorra" "Dominica" "American Samoa" ...
##  $ slug               : chr [1:221] "south-sudan" "andorra" "dominica" "american-samoa" ...
##  $ %                  : num [1:221] -6.69 -0.9 -0.73 -0.5 -0.4 -0.3 0 0 0.3 0.3 ...
##  $ date_of_information: chr [1:221] "2022" "2015" "2020" "2015" ...
##  $ ranking            : num [1:221] 1 2 3 4 5 6 7 8 9 10 ...
##  $ region             : chr [1:221] "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   name = col_character(),
##   ..   slug = col_character(),
##   ..   `%` = col_number(),
##   ..   date_of_information = col_character(),
##   ..   ranking = col_double(),
##   ..   region = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Inflation$date_of_information[9] <- gsub("January ", "", Inflation$date_of_information[9])

str(Youth)

## spc_tbl_ [203 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ name               : chr [1:203] "Djibouti" "South Africa" "Eswatini" "Libya" ...
##  $ slug               : chr [1:203] "djibouti" "south-africa" "eswatini" "libya" ...
##  $ %                  : num [1:203] 79.9 64.2 50.9 50.5 48.8 45.4 42.3 42.2 41.2 41.1 ...
##  $ date_of_information: num [1:203] 2021 2021 2021 2021 2020 ...
##  $ ranking            : num [1:203] 1 2 3 4 5 6 7 8 9 10 ...
##  $ region             : chr [1:203] "Africa" "Africa" "Africa" "Africa" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   name = col_character(),
##   ..   slug = col_character(),
##   ..   `%` = col_double(),
##   ..   date_of_information = col_double(),
##   ..   ranking = col_double(),
##   ..   region = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

str(Debt)

## spc_tbl_ [207 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ name               : chr [1:207] "United States" "United Kingdom" "France" "Germany" ...
##  $ slug               : chr [1:207] "united-states" "united-kingdom" "france" "germany" ...
##  $ value              : chr [1:207] "$20,275,951,000,000" "$8,722,000,000,000" "$6,356,000,000,000" "$5,671,463,000,000" ...
##  $ date_of_information: chr [1:207] "2019" "2019" "2019" "2019" ...
##  $ ranking            : num [1:207] 1 2 3 4 5 6 7 8 9 10 ...
##  $ region             : chr [1:207] "North America" "Europe" "Europe" "Europe" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   name = col_character(),
##   ..   slug = col_character(),
##   ..   value = col_character(),
##   ..   date_of_information = col_character(),
##   ..   ranking = col_double(),
##   ..   region = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Debt$date_of_information <- gsub(".*([0-9]{4})", "\\1", Debt$date_of_information)

–> “FY10/11” a solo “2011” PREGUNTAR AL PROFE

fila_a_modificar <- 185
nuevo_año <- gsub("FY(\\d{2})/(\\d{2})", "\\2", Debt$date_of_information[fila_a_modificar])
nuevo_año <- paste0("20", nuevo_año)
Debt$date_of_information[fila_a_modificar] <- nuevo_año

Debt$date_of_information<- as.numeric(Debt$date_of_information)

Debt$value <- as.numeric(gsub("[\\$,]", "", Debt$value))

Renombrar variables:

names(Carbon)[names(Carbon) == "metric tonnes of CO2"] <- "co2"

names(Energy)[names(Energy) == "Btu/person"] <- "BTU"

names(Inflation)[names(Inflation) == "%"] <- "porcentaje"

names(Public)[names(Public) == "% of GDP"] <- "GDP"

names(Refined)[names(Refined) == "bbl/day"] <- "bbl"

names(Youth)[names(Youth) == "%"] <- "porcentaje2"

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Broadband <- select(Broadband, name, value)

Carbon <- select(Carbon, name, co2)
Debt <- select(Debt, name, value)
Electricity <- select(Electricity, name, kW)
Energy<- select(Energy, name, BTU)
Inflation <- select(Inflation, name, porcentaje)
Public <- select(Public, name, GDP)
Refined <- select(Refined, name, bbl)
Telephones_fixed <- select(Telephones_fixed, name, value)
Telephones_mobile <- select(Telephones_mobile, name, value)
Youth <- select(Youth, name, porcentaje2)

Broadband <- Broadband[complete.cases(Broadband), ]
Carbon <- Carbon[complete.cases(Carbon), ]
Debt <- Debt[complete.cases(Debt), ]
Electricity <- Electricity[complete.cases(Electricity), ]
Energy <- Energy[complete.cases(Energy), ]
Inflation <- Inflation[complete.cases(Inflation), ]
Public <- Public[complete.cases(Public), ]
Refined <- Refined[complete.cases(Refined), ]
Telephones_fixed <- Telephones_fixed[complete.cases(Telephones_fixed), ]
Telephones_mobile <- Telephones_mobile[complete.cases(Telephones_mobile), ]
Youth <- Youth[complete.cases(Youth), ]

merged_df <- merge(Broadband, Carbon, by = "name", all = TRUE)

merged_df1 <- merge(merged_df, Debt, by = "name", all = TRUE)

merged_df2 <- merge(merged_df1, Electricity, by = "name",all = TRUE)

merged_df3 <- merge(merged_df2, Energy, by = "name",all = TRUE)

merged_df4 <- merge(merged_df3, Inflation, by = "name",all = TRUE)

merged_df5 <- merge(merged_df4, Public, by = "name",all = TRUE)

merged_df6 <- merge(merged_df5, Refined, by = "name",all = TRUE)

merged_df7 <- merge(merged_df6, Telephones_fixed, by = "name",all = TRUE)

merged_df8 <- merge(merged_df7, Telephones_mobile, by = "name", suffixes = c(".data1", ".data2"))

merged_df9 <- merge(merged_df8, Youth, by = "name",all = TRUE)

names(merged_df9)

##  [1] "name"        "value.x"     "co2"         "value.y"     "kW"         
##  [6] "BTU"         "porcentaje"  "GDP"         "bbl"         "value.data1"
## [11] "value.data2" "porcentaje2"

dontselect=c("name")
select=setdiff(names(merged_df9),dontselect) 
theData=merged_df9[,select]

library(polycor)

## Warning: package 'polycor' was built under R version 4.3.3

corMatrix=polycor::hetcor(theData)$correlations
round(corMatrix,2)

##             value.x   co2 value.y    kW   BTU porcentaje   GDP   bbl
## value.x        1.00  0.96    0.32  0.96  0.07      -0.01  0.06  0.67
## co2            0.96  1.00    0.44  0.99  0.13      -0.01  0.05  0.82
## value.y        0.32  0.44    1.00  0.49  0.28      -0.02  0.30  0.75
## kW             0.96  0.99    0.49  1.00  0.12       0.00  0.09  0.83
## BTU            0.07  0.13    0.28  0.12  1.00       0.00  0.08  0.24
## porcentaje    -0.01 -0.01   -0.02  0.00  0.00       1.00 -0.04  0.02
## GDP            0.06  0.05    0.30  0.09  0.08      -0.04  1.00  0.14
## bbl            0.67  0.82    0.75  0.83  0.24       0.02  0.14  1.00
## value.data1    0.93  0.94    0.55  0.96  0.13      -0.01  0.19  0.81
## value.data2    0.85  0.89    0.21  0.87  0.00      -0.01  0.01  0.63
## porcentaje2   -0.07 -0.07   -0.11 -0.07 -0.14      -0.03  0.10 -0.08
##             value.data1 value.data2 porcentaje2
## value.x            0.93        0.85       -0.07
## co2                0.94        0.89       -0.07
## value.y            0.55        0.21       -0.11
## kW                 0.96        0.87       -0.07
## BTU                0.13        0.00       -0.14
## porcentaje        -0.01       -0.01       -0.03
## GDP                0.19        0.01        0.10
## bbl                0.81        0.63       -0.08
## value.data1        1.00        0.82       -0.09
## value.data2        0.82        1.00       -0.05
## porcentaje2       -0.09       -0.05        1.00

library(psych)

## Warning: package 'psych' was built under R version 4.3.2

## 
## Attaching package: 'psych'

## The following object is masked from 'package:polycor':
## 
##     polyserial

psych::KMO(corMatrix)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.76
## MSA for each item = 
##     value.x         co2     value.y          kW         BTU  porcentaje 
##        0.69        0.79        0.80        0.83        0.58        0.11 
##         GDP         bbl value.data1 value.data2 porcentaje2 
##        0.63        0.69        0.87        0.73        0.51

cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05

## [1] FALSE

library(matrixcalc)

is.singular.matrix(corMatrix)

## [1] FALSE

fa.parallel(theData, fa = 'fa',correct = T,plot = F)

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

#library(GPArotation)
resfa <- fa(theData,
            nfactors = 3,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")

## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27122e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27067e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27074e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27048e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27036e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27033e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

print(resfa$loadings)

## 
## Loadings:
##             MR1    MR2    MR3   
## value.x      0.963              
## co2          0.979  0.202       
## value.y      0.257  0.946       
## kW           0.969  0.253       
## BTU                 0.308       
## porcentaje                      
## GDP                 0.284  0.113
## bbl          0.692  0.597       
## value.data1  0.907  0.339       
## value.data2  0.903              
## porcentaje2        -0.130  0.988
## 
##                  MR1   MR2   MR3
## SS loadings    5.015 1.669 1.001
## Proportion Var 0.456 0.152 0.091
## Cumulative Var 0.456 0.608 0.699

print(resfa$loadings,cutoff = 0.5)

## 
## Loadings:
##             MR1    MR2    MR3   
## value.x      0.963              
## co2          0.979              
## value.y             0.946       
## kW           0.969              
## BTU                             
## porcentaje                      
## GDP                             
## bbl          0.692  0.597       
## value.data1  0.907              
## value.data2  0.903              
## porcentaje2                0.988
## 
##                  MR1   MR2   MR3
## SS loadings    5.015 1.669 1.001
## Proportion Var 0.456 0.152 0.091
## Cumulative Var 0.456 0.608 0.699

fa.diagram(resfa,main = "Resultados del EFA")

sort(resfa$communality)

##  porcentaje         GDP         BTU value.data2         bbl     value.x 
## 0.000991509 0.094628887 0.107103202 0.816458070 0.835707988 0.932449076 
## value.data1     value.y porcentaje2         co2          kW 
## 0.937750818 0.962741401 0.995312984 0.999011831 1.003077347

sort(resfa$complexity)

## value.data2     value.x porcentaje2  porcentaje         co2          kW 
##    1.000474    1.012238    1.039826    1.075632    1.084642    1.135500 
##     value.y         BTU value.data1         GDP         bbl 
##    1.149599    1.254178    1.273787    1.337513    1.961335

library(GPArotation)

## Warning: package 'GPArotation' was built under R version 4.3.3

## 
## Attaching package: 'GPArotation'

## The following objects are masked from 'package:psych':
## 
##     equamax, varimin

resfa1 <- fa(theData,
            nfactors = 3,
            cor = 'mixed',
            rotate = "oblimin", #oblimin?
            fm="minres")

## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27122e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27067e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27039e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27128e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27074e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27049e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27036e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27033e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27032e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=9.27128e+09, f=25, theta=9.27031e+09, ..): not converged in 1000000
## iter.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

print(resfa1$loadings)

## 
## Loadings:
##             MR1    MR2    MR3   
## value.x      1.004 -0.102       
## co2          0.983              
## value.y             0.982       
## kW           0.958              
## BTU                 0.312 -0.112
## porcentaje                      
## GDP                 0.317       
## bbl          0.561  0.521       
## value.data1  0.866  0.199       
## value.data2  0.966 -0.182       
## porcentaje2                0.997
## 
##                  MR1   MR2   MR3
## SS loadings    4.895 1.528 1.019
## Proportion Var 0.445 0.139 0.093
## Cumulative Var 0.445 0.584 0.676

print(resfa1$loadings,cutoff = 0.5)

## 
## Loadings:
##             MR1    MR2    MR3   
## value.x      1.004              
## co2          0.983              
## value.y             0.982       
## kW           0.958              
## BTU                             
## porcentaje                      
## GDP                             
## bbl          0.561  0.521       
## value.data1  0.866              
## value.data2  0.966              
## porcentaje2                0.997
## 
##                  MR1   MR2   MR3
## SS loadings    4.895 1.528 1.019
## Proportion Var 0.445 0.139 0.093
## Cumulative Var 0.445 0.584 0.676

fa.diagram(resfa,main = "Resultados del EFA")

sort(resfa1$communality)

##  porcentaje         GDP         BTU value.data2         bbl     value.x 
## 0.000991509 0.094628887 0.107103202 0.816458070 0.835707988 0.932449076 
## value.data1     value.y porcentaje2         co2          kW 
## 0.937750818 0.962741401 0.995312984 0.999011831 1.003077347

pregunta 2

merged_df9 <- merged_df9[complete.cases(merged_df9), ]

library(dplyr)
merged_df9 = select(merged_df9, name, value.y, kW, BTU)

#install.packages("BBmisc")
library(BBmisc)

## Warning: package 'BBmisc' was built under R version 4.3.3

## 
## Attaching package: 'BBmisc'

## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse, symdiff

## The following object is masked from 'package:base':
## 
##     isFALSE

merged_df9[,c(2:4)]=normalize(merged_df9[,c(2:4)],method='standardize')

dataClus=merged_df9[,c(2:4)]
row.names(dataClus)=merged_df9$name

library(cluster)
g.dist = daisy(dataClus, metric="gower")

library(factoextra)

## Warning: package 'factoextra' was built under R version 4.3.3

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
library(factoextra)

res.agnes<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")

dataClus$agnes=res.agnes$cluster

# Visualize
fviz_dend(res.agnes, cex = 0.7, horiz = T,main = "")

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fviz_silhouette(res.agnes,print.summary = F)

silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$country=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'country']%>%sort()
poorAGNES

## [1] "Bermuda"      "Czechia"      "Italy"        "Seychelles"   "Turkmenistan"

aggregate(.~ agnes, data=dataClus,mean)

##   agnes     value.y          kW        BTU
## 1     1 -0.23245756 -0.14616331 -0.3888734
## 2     2  0.76604965  0.14700723  1.0002616
## 3     3  0.05465014 -0.04406216  3.6555764
## 4     4  0.82009151 11.35044751  0.1487211
## 5     5 10.58939909  5.74313859  1.8638180

slckd

2024-06-12