EDA_F

# Instalar plotly si aún no está instalado
if (!requireNamespace("plotly", quietly = TRUE)) {
  install.packages("plotly")
}

# Instalación de librerías
#install.packages("ggplot2")
#install.packages("reshape2")
##install.packages("dplyr")
#install.packages("data.table")

# Carga de librerías
library(ggplot2)
library(reshape2)
library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(data.table)

## 
## Adjuntando el paquete: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt

# Lectura del archivo CSV
energia_sus <- fread("global_data_sustainable_energy.csv")

# Visualización de los primeros y últimos 5 datos
head(energia_sus)

##         Entity  Year Access to electricity (% of population)
##         <char> <int>                                   <num>
## 1: Afghanistan  2000                                1.613591
## 2: Afghanistan  2001                                4.074574
## 3: Afghanistan  2002                                9.409158
## 4: Afghanistan  2003                               14.738506
## 5: Afghanistan  2004                               20.064968
## 6: Afghanistan  2005                               25.390894
##    Access to clean fuels for cooking
##                                <num>
## 1:                               6.2
## 2:                               7.2
## 3:                               8.2
## 4:                               9.5
## 5:                              10.9
## 6:                              12.2
##    Renewable-electricity-generating-capacity-per-capita
##                                                   <num>
## 1:                                                 9.22
## 2:                                                 8.86
## 3:                                                 8.47
## 4:                                                 8.09
## 5:                                                 7.75
## 6:                                                 7.51
##    Financial flows to developing countries (US $)
##                                             <i64>
## 1:                                          20000
## 2:                                         130000
## 3:                                        3950000
## 4:                                       25970000
## 5:                                           <NA>
## 6:                                        9830000
##    Renewable energy share in the total final energy consumption (%)
##                                                               <num>
## 1:                                                            44.99
## 2:                                                            45.60
## 3:                                                            37.83
## 4:                                                            36.66
## 5:                                                            44.24
## 6:                                                            33.88
##    Electricity from fossil fuels (TWh) Electricity from nuclear (TWh)
##                                  <num>                          <num>
## 1:                                0.16                              0
## 2:                                0.09                              0
## 3:                                0.13                              0
## 4:                                0.31                              0
## 5:                                0.33                              0
## 6:                                0.34                              0
##    Electricity from renewables (TWh) Low-carbon electricity (% electricity)
##                                <num>                                  <num>
## 1:                              0.31                               65.95744
## 2:                              0.50                               84.74577
## 3:                              0.56                               81.15942
## 4:                              0.63                               67.02128
## 5:                              0.56                               62.92135
## 6:                              0.59                               63.44086
##    Primary energy consumption per capita (kWh/person)
##                                                 <num>
## 1:                                           302.5948
## 2:                                           236.8919
## 3:                                           210.8622
## 4:                                           229.9682
## 5:                                           204.2312
## 6:                                           252.0691
##    Energy intensity level of primary energy (MJ/$2017 PPP GDP)
##                                                          <num>
## 1:                                                        1.64
## 2:                                                        1.74
## 3:                                                        1.40
## 4:                                                        1.40
## 5:                                                        1.20
## 6:                                                        1.41
##    Value_co2_emissions_kt_by_country Renewables (% equivalent primary energy)
##                                <num>                                    <num>
## 1:                               760                                       NA
## 2:                               730                                       NA
## 3:                              1030                                       NA
## 4:                              1220                                       NA
## 5:                              1030                                       NA
## 6:                              1550                                       NA
##    gdp_growth gdp_per_capita Density\\n(P/Km2) Land Area(Km2) Latitude
##         <num>          <num>            <char>          <int>    <num>
## 1:         NA             NA                60         652230 33.93911
## 2:         NA             NA                60         652230 33.93911
## 3:         NA       179.4266                60         652230 33.93911
## 4:   8.832278       190.6838                60         652230 33.93911
## 5:   1.414118       211.3821                60         652230 33.93911
## 6:  11.229715       242.0313                60         652230 33.93911
##    Longitude
##        <num>
## 1:  67.70995
## 2:  67.70995
## 3:  67.70995
## 4:  67.70995
## 5:  67.70995
## 6:  67.70995

tail(energia_sus)

##      Entity  Year Access to electricity (% of population)
##      <char> <int>                                   <num>
## 1: Zimbabwe  2015                                33.70000
## 2: Zimbabwe  2016                                42.56173
## 3: Zimbabwe  2017                                44.17863
## 4: Zimbabwe  2018                                45.57265
## 5: Zimbabwe  2019                                46.78148
## 6: Zimbabwe  2020                                52.74767
##    Access to clean fuels for cooking
##                                <num>
## 1:                              29.5
## 2:                              29.8
## 3:                              29.8
## 4:                              29.9
## 5:                              30.1
## 6:                              30.4
##    Renewable-electricity-generating-capacity-per-capita
##                                                   <num>
## 1:                                                63.54
## 2:                                                62.88
## 3:                                                62.33
## 4:                                                82.53
## 5:                                                81.40
## 6:                                                80.61
##    Financial flows to developing countries (US $)
##                                             <i64>
## 1:                                           <NA>
## 2:                                          30000
## 3:                                        5570000
## 4:                                          10000
## 5:                                         250000
## 6:                                          30000
##    Renewable energy share in the total final energy consumption (%)
##                                                               <num>
## 1:                                                            80.82
## 2:                                                            81.90
## 3:                                                            82.46
## 4:                                                            80.23
## 5:                                                            81.50
## 6:                                                            81.90
##    Electricity from fossil fuels (TWh) Electricity from nuclear (TWh)
##                                  <num>                          <num>
## 1:                                4.02                              0
## 2:                                3.50                              0
## 3:                                3.05                              0
## 4:                                3.73                              0
## 5:                                3.66                              0
## 6:                                3.40                              0
##    Electricity from renewables (TWh) Low-carbon electricity (% electricity)
##                                <num>                                  <num>
## 1:                              5.37                               57.18850
## 2:                              3.32                               48.68035
## 3:                              4.30                               58.50341
## 4:                              5.46                               59.41241
## 5:                              4.58                               55.58253
## 6:                              4.19                               55.20422
##    Primary energy consumption per capita (kWh/person)
##                                                 <num>
## 1:                                           3860.920
## 2:                                           3227.680
## 3:                                           3068.012
## 4:                                           3441.986
## 5:                                           3003.655
## 6:                                           2680.132
##    Energy intensity level of primary energy (MJ/$2017 PPP GDP)
##                                                          <num>
## 1:                                                       10.36
## 2:                                                       10.00
## 3:                                                        9.51
## 4:                                                        9.83
## 5:                                                       10.47
## 6:                                                       10.00
##    Value_co2_emissions_kt_by_country Renewables (% equivalent primary energy)
##                                <num>                                    <num>
## 1:                             12430                                       NA
## 2:                             11020                                       NA
## 3:                             10340                                       NA
## 4:                             12380                                       NA
## 5:                             11760                                       NA
## 6:                                NA                                       NA
##    gdp_growth gdp_per_capita Density\\n(P/Km2) Land Area(Km2)  Latitude
##         <num>          <num>            <char>          <int>     <num>
## 1:  1.7798727       1445.070                38         390757 -19.01544
## 2:  0.7558693       1464.589                38         390757 -19.01544
## 3:  4.7094922       1235.189                38         390757 -19.01544
## 4:  4.8242105       1254.642                38         390757 -19.01544
## 5: -6.1442363       1316.741                38         390757 -19.01544
## 6: -6.2487482       1214.510                38         390757 -19.01544
##    Longitude
##        <num>
## 1:  29.15486
## 2:  29.15486
## 3:  29.15486
## 4:  29.15486
## 5:  29.15486
## 6:  29.15486

# Información de la base de datos
str(energia_sus)

## Classes 'data.table' and 'data.frame':   3649 obs. of  21 variables:
##  $ Entity                                                          : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ Year                                                            : int  2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 ...
##  $ Access to electricity (% of population)                         : num  1.61 4.07 9.41 14.74 20.06 ...
##  $ Access to clean fuels for cooking                               : num  6.2 7.2 8.2 9.5 10.9 ...
##  $ Renewable-electricity-generating-capacity-per-capita            : num  9.22 8.86 8.47 8.09 7.75 7.51 7.4 7.25 7.49 7.5 ...
##  $ Financial flows to developing countries (US $)                  :integer64 20000 130000 3950000 25970000 NA 9830000 10620000 15750000 ... 
##  $ Renewable energy share in the total final energy consumption (%): num  45 45.6 37.8 36.7 44.2 ...
##  $ Electricity from fossil fuels (TWh)                             : num  0.16 0.09 0.13 0.31 0.33 0.34 0.2 0.2 0.19 0.16 ...
##  $ Electricity from nuclear (TWh)                                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Electricity from renewables (TWh)                               : num  0.31 0.5 0.56 0.63 0.56 0.59 0.64 0.75 0.54 0.78 ...
##  $ Low-carbon electricity (% electricity)                          : num  66 84.7 81.2 67 62.9 ...
##  $ Primary energy consumption per capita (kWh/person)              : num  303 237 211 230 204 ...
##  $ Energy intensity level of primary energy (MJ/$2017 PPP GDP)     : num  1.64 1.74 1.4 1.4 1.2 1.41 1.5 1.53 1.94 2.25 ...
##  $ Value_co2_emissions_kt_by_country                               : num  760 730 1030 1220 1030 ...
##  $ Renewables (% equivalent primary energy)                        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gdp_growth                                                      : num  NA NA NA 8.83 1.41 ...
##  $ gdp_per_capita                                                  : num  NA NA 179 191 211 ...
##  $ Density\n(P/Km2)                                               : chr  "60" "60" "60" "60" ...
##  $ Land Area(Km2)                                                  : int  652230 652230 652230 652230 652230 652230 652230 652230 652230 652230 ...
##  $ Latitude                                                        : num  33.9 33.9 33.9 33.9 33.9 ...
##  $ Longitude                                                       : num  67.7 67.7 67.7 67.7 67.7 ...
##  - attr(*, ".internal.selfref")=<externalptr>

# Dimensión de la base de datos
dimensiones <- dim(energia_sus)
print(dimensiones)

## [1] 3649   21

# Estadísticas básicas: media, desviación estándar, min, max, percentiles (25, 50, 75)
summary(energia_sus)

##     Entity               Year      Access to electricity (% of population)
##  Length:3649        Min.   :2000   Min.   :  1.252                        
##  Class :character   1st Qu.:2005   1st Qu.: 59.801                        
##  Mode  :character   Median :2010   Median : 98.362                        
##                     Mean   :2010   Mean   : 78.934                        
##                     3rd Qu.:2015   3rd Qu.:100.000                        
##                     Max.   :2020   Max.   :100.000                        
##                                    NA's   :10                             
##  Access to clean fuels for cooking
##  Min.   :  0.00                   
##  1st Qu.: 23.18                   
##  Median : 83.15                   
##  Mean   : 63.26                   
##  3rd Qu.:100.00                   
##  Max.   :100.00                   
##  NA's   :169                      
##  Renewable-electricity-generating-capacity-per-capita
##  Min.   :   0.00                                     
##  1st Qu.:   3.54                                     
##  Median :  32.91                                     
##  Mean   : 113.14                                     
##  3rd Qu.: 112.21                                     
##  Max.   :3060.19                                     
##  NA's   :931                                         
##  Financial flows to developing countries (US $)
##  Min.   :         0                            
##  1st Qu.:    260000                            
##  Median :   5660000                            
##  Mean   :  94224000                            
##  3rd Qu.:  55290000                            
##  Max.   :5202310000                            
##  NA's   :      2089                            
##  Renewable energy share in the total final energy consumption (%)
##  Min.   : 0.000                                                  
##  1st Qu.: 6.515                                                  
##  Median :23.300                                                  
##  Mean   :32.638                                                  
##  3rd Qu.:55.245                                                  
##  Max.   :96.040                                                  
##  NA's   :194                                                     
##  Electricity from fossil fuels (TWh) Electricity from nuclear (TWh)
##  Min.   :   0.00                     Min.   :  0.00                
##  1st Qu.:   0.29                     1st Qu.:  0.00                
##  Median :   2.97                     Median :  0.00                
##  Mean   :  70.36                     Mean   : 13.45                
##  3rd Qu.:  26.84                     3rd Qu.:  0.00                
##  Max.   :5184.13                     Max.   :809.41                
##  NA's   :21                          NA's   :126                   
##  Electricity from renewables (TWh) Low-carbon electricity (% electricity)
##  Min.   :   0.00                   Min.   :  0.000                       
##  1st Qu.:   0.04                   1st Qu.:  2.878                       
##  Median :   1.47                   Median : 27.865                       
##  Mean   :  23.97                   Mean   : 36.801                       
##  3rd Qu.:   9.60                   3rd Qu.: 64.404                       
##  Max.   :2184.94                   Max.   :100.000                       
##  NA's   :21                        NA's   :42                            
##  Primary energy consumption per capita (kWh/person)
##  Min.   :     0                                    
##  1st Qu.:  3117                                    
##  Median : 13121                                    
##  Mean   : 25744                                    
##  3rd Qu.: 33893                                    
##  Max.   :262586                                    
##                                                    
##  Energy intensity level of primary energy (MJ/$2017 PPP GDP)
##  Min.   : 0.110                                             
##  1st Qu.: 3.170                                             
##  Median : 4.300                                             
##  Mean   : 5.307                                             
##  3rd Qu.: 6.027                                             
##  Max.   :32.570                                             
##  NA's   :207                                                
##  Value_co2_emissions_kt_by_country Renewables (% equivalent primary energy)
##  Min.   :      10                  Min.   : 0.000                          
##  1st Qu.:    2020                  1st Qu.: 2.137                          
##  Median :   10500                  Median : 6.291                          
##  Mean   :  159866                  Mean   :11.987                          
##  3rd Qu.:   60580                  3rd Qu.:16.842                          
##  Max.   :10707220                  Max.   :86.837                          
##  NA's   :428                       NA's   :2137                            
##    gdp_growth      gdp_per_capita     Density\\n(P/Km2)  Land Area(Km2)   
##  Min.   :-62.076   Min.   :   111.9   Length:3649        Min.   :     21  
##  1st Qu.:  1.383   1st Qu.:  1337.8   Class :character   1st Qu.:  25713  
##  Median :  3.560   Median :  4578.6   Mode  :character   Median : 117600  
##  Mean   :  3.442   Mean   : 13283.8                      Mean   : 633213  
##  3rd Qu.:  5.830   3rd Qu.: 15768.6                      3rd Qu.: 513120  
##  Max.   :123.140   Max.   :123514.2                      Max.   :9984670  
##  NA's   :317       NA's   :282                           NA's   :1        
##     Latitude         Longitude      
##  Min.   :-40.901   Min.   :-175.20  
##  1st Qu.:  3.203   1st Qu.: -11.78  
##  Median : 17.190   Median :  19.15  
##  Mean   : 18.246   Mean   :  14.82  
##  3rd Qu.: 38.970   3rd Qu.:  46.20  
##  Max.   : 64.963   Max.   : 178.07  
##  NA's   :1         NA's   :1

# Conteo de valores NaN (NA en R) en todo el DataFrame
nan_counts <- sapply(energia_sus, function(x) sum(is.na(x)))
cat("Número de valores NaN por columna:\n")

## Número de valores NaN por columna:

print(nan_counts)

##                                                           Entity 
##                                                                0 
##                                                             Year 
##                                                                0 
##                          Access to electricity (% of population) 
##                                                               10 
##                                Access to clean fuels for cooking 
##                                                              169 
##             Renewable-electricity-generating-capacity-per-capita 
##                                                              931 
##                   Financial flows to developing countries (US $) 
##                                                             2089 
## Renewable energy share in the total final energy consumption (%) 
##                                                              194 
##                              Electricity from fossil fuels (TWh) 
##                                                               21 
##                                   Electricity from nuclear (TWh) 
##                                                              126 
##                                Electricity from renewables (TWh) 
##                                                               21 
##                           Low-carbon electricity (% electricity) 
##                                                               42 
##               Primary energy consumption per capita (kWh/person) 
##                                                                0 
##      Energy intensity level of primary energy (MJ/$2017 PPP GDP) 
##                                                              207 
##                                Value_co2_emissions_kt_by_country 
##                                                              428 
##                         Renewables (% equivalent primary energy) 
##                                                             2137 
##                                                       gdp_growth 
##                                                              317 
##                                                   gdp_per_capita 
##                                                              282 
##                                                Density\\n(P/Km2) 
##                                                                0 
##                                                   Land Area(Km2) 
##                                                                1 
##                                                         Latitude 
##                                                                1 
##                                                        Longitude 
##                                                                1

# Total de valores NaN en el DataFrame
total_nan <- sum(nan_counts)
cat("\nTotal de valores NaN en el DataFrame:", total_nan, "\n")

## 
## Total de valores NaN en el DataFrame: 6977

# Nombres de las columnas
print(names(energia_sus))

##  [1] "Entity"                                                          
##  [2] "Year"                                                            
##  [3] "Access to electricity (% of population)"                         
##  [4] "Access to clean fuels for cooking"                               
##  [5] "Renewable-electricity-generating-capacity-per-capita"            
##  [6] "Financial flows to developing countries (US $)"                  
##  [7] "Renewable energy share in the total final energy consumption (%)"
##  [8] "Electricity from fossil fuels (TWh)"                             
##  [9] "Electricity from nuclear (TWh)"                                  
## [10] "Electricity from renewables (TWh)"                               
## [11] "Low-carbon electricity (% electricity)"                          
## [12] "Primary energy consumption per capita (kWh/person)"              
## [13] "Energy intensity level of primary energy (MJ/$2017 PPP GDP)"     
## [14] "Value_co2_emissions_kt_by_country"                               
## [15] "Renewables (% equivalent primary energy)"                        
## [16] "gdp_growth"                                                      
## [17] "gdp_per_capita"                                                  
## [18] "Density\\n(P/Km2)"                                               
## [19] "Land Area(Km2)"                                                  
## [20] "Latitude"                                                        
## [21] "Longitude"

# Renombrar columnas
energia_sus <- energia_sus %>% 
  rename(CO2 = Value_co2_emissions_kt_by_country, Country = Entity)

# Seleccionar columnas de interés para el boxplot
variables <- c(
  'Access to electricity (% of population)',
  'Access to clean fuels for cooking',
  'Renewable-electricity-generating-capacity-per-capita',
  'Financial flows to developing countries (US $)',
  'Renewable energy share in the total final energy consumption (%)',
  'Electricity from fossil fuels (TWh)',
  'Electricity from nuclear (TWh)',
  'Electricity from renewables (TWh)',
  'Low-carbon electricity (% electricity)',
  'Primary energy consumption per capita (kWh/person)',
  'Energy intensity level of primary energy (MJ/$2017 PPP GDP)',
  'CO2',
  'Renewables (% equivalent primary energy)',
  'gdp_growth',
  'gdp_per_capita',
  'Land Area(Km2)'
)

# Filtrar las columnas seleccionadas
df_selected <- energia_sus %>% select(all_of(variables))

# Derretir el DataFrame para el gráfico de caja
df_melted <- melt(df_selected, variable.name = 'Variable', value.name = 'Value')

## Warning in melt.data.table(df_selected, variable.name = "Variable", value.name
## = "Value"): id.vars and measure.vars are internally guessed when both are
## 'NULL'. All non-numeric/integer/logical type columns are considered id.vars,
## which in this case are columns []. Consider providing at least one of 'id' or
## 'measure' vars in future.

## Warning in melt.data.table(df_selected, variable.name = "Variable", value.name
## = "Value"): 'measure.vars' [Access to electricity (% of population), Access to
## clean fuels for cooking, Renewable-electricity-generating-capacity-per-capita,
## Financial flows to developing countries (US $), ...] are not all of the same
## type. By order of hierarchy, the molten data value column will be of type
## 'double'. All measure variables not of type 'double' will be coerced too. Check
## DETAILS in ?melt.data.table for more on coercion.

library(ggplot2)
library(reshape2)  # Para melt()

# Supongamos que tienes un data frame df_melted ya creado que contiene las columnas "Value" y "Variable"
# Crear el gráfico de caja
ggplot(df_melted, aes(x = Value, y = Variable)) +
  geom_boxplot(outlier.colour = "red", outlier.size = 1, fill = "lightblue") +
  geom_jitter(color = "black", size = 0.5, alpha = 0.5) +
  labs(title = "Distribución de Variables de Energía y Desarrollo",
       x = "Valores",
       y = "Variables") +
  theme_minimal()

## Warning: Removed 4886 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 4886 rows containing missing values or values outside the scale range
## (`geom_point()`).

## **Boxplot Individual para la variable 'Financial flows to developing countries (US $)'**
# Filtrar la columna específica
df_financial_flows <- energia_sus[, "Financial flows to developing countries (US $)"]

# Cargar la librería
library(plotly)

## 
## Adjuntando el paquete: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

# Crear el gráfico de caja
fig <- plot_ly(
  data = df_financial_flows, 
  x = ~`Financial flows to developing countries (US $)`, 
  type = "box",
  title = 'Distribución de los Flujos Financieros hacia los Países en Desarrollo (US $)',
  xaxis = list(title = 'Flujos Financieros (US $)')
)

# Mostrar el gráfico
fig

## Warning: 'box' objects don't have these attributes: 'title'
## Valid attributes include:
## 'alignmentgroup', 'boxmean', 'boxpoints', 'customdata', 'customdatasrc', 'dx', 'dy', 'fillcolor', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'jitter', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'lowerfence', 'lowerfencesrc', 'marker', 'mean', 'meansrc', 'median', 'mediansrc', 'meta', 'metasrc', 'name', 'notched', 'notchspan', 'notchspansrc', 'notchwidth', 'offsetgroup', 'opacity', 'orientation', 'pointpos', 'q1', 'q1src', 'q3', 'q3src', 'quartilemethod', 'sd', 'sdsrc', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'upperfence', 'upperfencesrc', 'visible', 'whiskerwidth', 'width', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Histograma de los flujos financieros para los países en desarrollo**
# Crear el histograma
fig_hist <- plot_ly(
  data = df_financial_flows, 
  x = ~`Financial flows to developing countries (US $)`, 
  type = "histogram", 
  nbinsx = 100,
  title = 'Histograma de los Flujos Financieros hacia los Países en Desarrollo (US $)',
  xaxis = list(title = 'Flujos Financieros (US $)')
)
fig_hist

## Warning: 'histogram' objects don't have these attributes: 'title'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'autobinx', 'autobiny', 'bingroup', 'cliponaxis', 'constraintext', 'cumulative', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'histfunc', 'histnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'nbinsx', 'nbinsy', 'offsetgroup', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textsrc', 'texttemplate', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'xaxis', 'xbins', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'yaxis', 'ybins', 'ycalendar', 'yhoverformat', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Boxplot para el valor de las emisiones de CO2 en Kt por país**
# Filtrar la columna específica
df_box_co2 <- energia_sus %>% select(CO2)

# Crear el gráfico de caja para la variable seleccionada
fig_co2 <- plot_ly(
  data = df_box_co2, 
  x = ~CO2, 
  type = "box",
  title = 'Valor de las emisiones de CO2 en kt por país',
  xaxis = list(title = 'Valor de las emisiones de CO2 en kt por país')
)
fig_co2

## Warning: Ignoring 428 observations
## Warning: 'box' objects don't have these attributes: 'title'
## Valid attributes include:
## 'alignmentgroup', 'boxmean', 'boxpoints', 'customdata', 'customdatasrc', 'dx', 'dy', 'fillcolor', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'jitter', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'lowerfence', 'lowerfencesrc', 'marker', 'mean', 'meansrc', 'median', 'mediansrc', 'meta', 'metasrc', 'name', 'notched', 'notchspan', 'notchspansrc', 'notchwidth', 'offsetgroup', 'opacity', 'orientation', 'pointpos', 'q1', 'q1src', 'q3', 'q3src', 'quartilemethod', 'sd', 'sdsrc', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'upperfence', 'upperfencesrc', 'visible', 'whiskerwidth', 'width', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Histograma del valor de las emisiones de CO2 en Kt por país**
# Crear el histograma
fig_hist_co2 <- plot_ly(
  data = df_box_co2, 
  x = ~CO2, 
  type = "histogram", 
  nbinsx = 100,
  title = 'Valor de las emisiones de CO2 en kt por país',
  xaxis = list(title = 'Valor de las emisiones de CO2 en kt por país')
)
fig_hist_co2

## Warning: Ignoring 428 observations

## Warning: 'histogram' objects don't have these attributes: 'title'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'autobinx', 'autobiny', 'bingroup', 'cliponaxis', 'constraintext', 'cumulative', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'histfunc', 'histnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'nbinsx', 'nbinsy', 'offsetgroup', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textsrc', 'texttemplate', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'xaxis', 'xbins', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'yaxis', 'ybins', 'ycalendar', 'yhoverformat', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Coeficiente de variación**
# Excluir las columnas específicas que no quieres analizar
columnas_excluir <- c('Year', 'Land Area(Km2)', 'Latitude', 'Longitude')

# Seleccionar solo las columnas numéricas excluyendo las que no queremos
columnas_incluidas <- setdiff(names(energia_sus)[sapply(energia_sus, is.numeric)], columnas_excluir)

# Calcular la media, desviación estándar y coeficiente de variación para las columnas seleccionadas
for (column in columnas_incluidas) {
  media <- mean(energia_sus[[column]], na.rm = TRUE)
  desviacion <- sd(energia_sus[[column]], na.rm = TRUE)
  
  # Evitar la división por cero
  if (media != 0) {
    coef_variacion <- (desviacion / media) * 100
    cat("El coeficiente de variación para", column, "es:", round(coef_variacion, 2), "%\n")
  } else {
    cat("La media de la columna", column, "es cero, no se puede calcular el coeficiente de variación.\n")
  }
}

## El coeficiente de variación para Access to electricity (% of population) es: 38.36 %
## El coeficiente de variación para Access to clean fuels for cooking es: 61.72 %
## El coeficiente de variación para Renewable-electricity-generating-capacity-per-capita es: 215.81 %
## El coeficiente de variación para Financial flows to developing countries (US $) es: 316.43 %
## El coeficiente de variación para Renewable energy share in the total final energy consumption (%) es: 91.59 %
## El coeficiente de variación para Electricity from fossil fuels (TWh) es: 494.64 %
## El coeficiente de variación para Electricity from nuclear (TWh) es: 542.79 %
## El coeficiente de variación para Electricity from renewables (TWh) es: 435.71 %
## El coeficiente de variación para Low-carbon electricity (% electricity) es: 93.24 %
## El coeficiente de variación para Primary energy consumption per capita (kWh/person) es: 135.07 %
## El coeficiente de variación para Energy intensity level of primary energy (MJ/$2017 PPP GDP) es: 66.55 %
## El coeficiente de variación para CO2 es: 483.94 %
## El coeficiente de variación para Renewables (% equivalent primary energy) es: 125.09 %
## El coeficiente de variación para gdp_growth es: 165.23 %
## El coeficiente de variación para gdp_per_capita es: 148.38 %

## **Coeficiente de asimetría de Pearson**
asimetria_pearson <- list()

# Calcular la media, mediana, desviación estándar y coeficiente de asimetría de Pearson para cada columna numérica
for (column in columnas_incluidas) {
  media <- mean(energia_sus[[column]], na.rm = TRUE)
  mediana <- median(energia_sus[[column]], na.rm = TRUE)
  desviacion <- sd(energia_sus[[column]], na.rm = TRUE)
  
  # Evitar la división por cero
  if (desviacion != 0) {
    coef_asimetria <- (3 * (media - mediana)) / desviacion
    asimetria_pearson[[column]] <- coef_asimetria
    cat("El coeficiente de asimetría de Pearson para", column, "es:", round(coef_asimetria, 2), "\n")
  } else {
    cat("La desviación estándar de la columna", column, "es cero, no se puede calcular el coeficiente de asimetría.\n")
  }
}

## El coeficiente de asimetría de Pearson para Access to electricity (% of population) es: -1.93 
## El coeficiente de asimetría de Pearson para Access to clean fuels for cooking es: -1.53 
## El coeficiente de asimetría de Pearson para Renewable-electricity-generating-capacity-per-capita es: 0.99 
## El coeficiente de asimetría de Pearson para Financial flows to developing countries (US $) es: 0.89 
## El coeficiente de asimetría de Pearson para Renewable energy share in the total final energy consumption (%) es: 0.94 
## El coeficiente de asimetría de Pearson para Electricity from fossil fuels (TWh) es: 0.58 
## El coeficiente de asimetría de Pearson para Electricity from nuclear (TWh) es: 0.55 
## El coeficiente de asimetría de Pearson para Electricity from renewables (TWh) es: 0.65 
## El coeficiente de asimetría de Pearson para Low-carbon electricity (% electricity) es: 0.78 
## El coeficiente de asimetría de Pearson para Primary energy consumption per capita (kWh/person) es: 1.09 
## El coeficiente de asimetría de Pearson para Energy intensity level of primary energy (MJ/$2017 PPP GDP) es: 0.86 
## El coeficiente de asimetría de Pearson para CO2 es: 0.58 
## El coeficiente de asimetría de Pearson para Renewables (% equivalent primary energy) es: 1.14 
## El coeficiente de asimetría de Pearson para gdp_growth es: -0.06 
## El coeficiente de asimetría de Pearson para gdp_per_capita es: 1.32

# Mostrar los resultados como DataFrame
df_asimetria <- data.frame(
  Variable = names(asimetria_pearson),
  Coeficiente_Asimetria_de_Pearson = unlist(asimetria_pearson)
)
print(df_asimetria)

##                                                                                                                          Variable
## Access to electricity (% of population)                                                   Access to electricity (% of population)
## Access to clean fuels for cooking                                                               Access to clean fuels for cooking
## Renewable-electricity-generating-capacity-per-capita                         Renewable-electricity-generating-capacity-per-capita
## Financial flows to developing countries (US $)                                     Financial flows to developing countries (US $)
## Renewable energy share in the total final energy consumption (%) Renewable energy share in the total final energy consumption (%)
## Electricity from fossil fuels (TWh)                                                           Electricity from fossil fuels (TWh)
## Electricity from nuclear (TWh)                                                                     Electricity from nuclear (TWh)
## Electricity from renewables (TWh)                                                               Electricity from renewables (TWh)
## Low-carbon electricity (% electricity)                                                     Low-carbon electricity (% electricity)
## Primary energy consumption per capita (kWh/person)                             Primary energy consumption per capita (kWh/person)
## Energy intensity level of primary energy (MJ/$2017 PPP GDP)           Energy intensity level of primary energy (MJ/$2017 PPP GDP)
## CO2                                                                                                                           CO2
## Renewables (% equivalent primary energy)                                                 Renewables (% equivalent primary energy)
## gdp_growth                                                                                                             gdp_growth
## gdp_per_capita                                                                                                     gdp_per_capita
##                                                                  Coeficiente_Asimetria_de_Pearson
## Access to electricity (% of population)                                               -1.92510524
## Access to clean fuels for cooking                                                     -1.52865130
## Renewable-electricity-generating-capacity-per-capita                                   0.98572797
## Financial flows to developing countries (US $)                                         0.89112216
## Renewable energy share in the total final energy consumption (%)                       0.93709943
## Electricity from fossil fuels (TWh)                                                    0.58090482
## Electricity from nuclear (TWh)                                                         0.55269740
## Electricity from renewables (TWh)                                                      0.64630210
## Low-carbon electricity (% electricity)                                                 0.78124529
## Primary energy consumption per capita (kWh/person)                                     1.08906319
## Energy intensity level of primary energy (MJ/$2017 PPP GDP)                            0.85561098
## CO2                                                                                    0.57919341
## Renewables (% equivalent primary energy)                                               1.13959506
## gdp_growth                                                                            -0.06237959
## gdp_per_capita                                                                         1.32499239

#if (!require("tidyr")) install.packages("tidyr")
library(tidyr)

## 
## Adjuntando el paquete: 'tidyr'

## The following object is masked from 'package:reshape2':
## 
##     smiths

## **Valores Faltantes**
# Contar valores faltantes por variable
missing_data <- energia_sus %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "missing_count") 

# Crear el gráfico de valores faltantes
ggplot(missing_data, aes(x = reorder(variable, -missing_count), y = missing_count)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Cantidad de valores faltantes por variable", x = "Variables", y = "Cantidad de valores faltantes") +
  theme_minimal()

# Calcular el porcentaje de valores faltantes
missing_percentage <- energia_sus %>%
  summarise(across(everything(), ~ sum(is.na(.)) / nrow(energia_sus) * 100)) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "missing_percentage")

# Crear el gráfico de porcentaje de valores faltantes
ggplot(missing_percentage, aes(x = reorder(variable, -missing_percentage), y = missing_percentage)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Porcentaje de valores faltantes por variable", x = "Variables", y = "Porcentaje de valores faltantes") +
  theme_minimal()

# Gráfico de barras para visualizar el porcentaje de valores faltantes
ggplot(missing_percentage, aes(x = reorder(variable, -missing_percentage), y = missing_percentage)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Porcentaje de valores faltantes por variable en el DataFrame energia_sus",
       x = "Variables",
       y = "Porcentaje de valores faltantes") +
  theme_minimal()

# Mapa de calor del patrón de datos faltantes
library(ggplot2)
library(reshape2)

# Mapa de calor para ver patrones de valores faltantes
missing_matrix <- is.na(energia_sus)
missing_heatmap <- melt(missing_matrix)

## Warning: The melt generic in data.table has been passed a matrix and will
## attempt to redirect to the relevant reshape2 method; please note that reshape2
## is superseded and is no longer actively developed, and this redirection is now
## deprecated. To continue using melt methods from reshape2 while both libraries
## are attached, e.g. melt.list, you can prepend the namespace, i.e.
## reshape2::melt(missing_matrix). In the next version, this warning will become
## an error.

ggplot(missing_heatmap, aes(Var2, Var1, fill = value)) +
  geom_tile() +
  scale_fill_manual(values = c("white", "blue"), labels = c("Presente", "Faltante")) +
  labs(title = "Mapa de calor del patrón de datos faltantes en 'energia_sus'",
       x = "Variables",
       y = "Observaciones") +
  theme_minimal()

## **Mapa geo-referenciado del porcentaje de población por país que tiene acceso a la electricidad en 2014**
df_2014 <- filter(energia_sus, Year == 2014)

# Filtrar los datos para el año 2014
df_2014 <- energia_sus %>% filter(Year == 2014)

# Crear el mapa coroplético para el acceso a electricidad
fig <- plot_geo(df_2014) %>%
  add_trace(
    z = ~`Access to electricity (% of population)`,
    locations = ~Country,
    color = ~`Access to electricity (% of population)`,
    colors = "Blues",  # Cambiar 'Plasma' a 'Viridis'
    colorbar = list(title = "Acceso a electricidad (%)")
  ) %>%
  layout(title = "Porcentaje de población por país que tiene acceso a electricidad en 2014")

# Mostrar el gráfico
fig

## **Mapa geo-referenciado del porcentaje de población por país que tiene acceso a combustibles limpios 2014**
fig <- plot_geo(df_2014) %>%
  add_trace(
    z = ~`Access to clean fuels for cooking`,
    locations = ~Country,
    color = ~`Access to clean fuels for cooking`,
    colors = "Blues"
  ) %>%
  layout(title = "Porcentaje de población por país que tiene acceso a combustibles limpios para cocinar en 2014")
fig

## Warning: Ignoring 8 observations

## **Principales fuentes de electricidad en Colombia**
# Agrupar y transformar datos para Colombia
energia_agrupado <- energia_sus %>%
  group_by(Country) %>%
  summarise(
    `Electricity from fossil fuels (TWh)` = sum(`Electricity from fossil fuels (TWh)`, na.rm = TRUE),
    `Electricity from nuclear (TWh)` = sum(`Electricity from nuclear (TWh)`, na.rm = TRUE),
    `Electricity from renewables (TWh)` = sum(`Electricity from renewables (TWh)`, na.rm = TRUE)
  )

df_colombia <- filter(energia_agrupado, Country == "Colombia") %>%
  pivot_longer(-Country, names_to = "Source", values_to = "Electricity (TWh)")

# Gráfico de pastel
fig <- plot_ly(df_colombia, labels = ~Source, values = ~`Electricity (TWh)`, type = 'pie') %>%
  layout(title = 'Principales fuentes de electricidad en Colombia')
fig

## **Países que usan energías renovables**
fig <- plot_geo(energia_sus) %>%
  add_trace(
    z = ~`Electricity from renewables (TWh)`,
    locations = ~Country,
    color = ~`Electricity from renewables (TWh)`,
    colors = "Blues"
  ) %>%
  layout(title = "Países que usan energías renovables")
fig

## Warning: Ignoring 21 observations

## **Países que usan energía nuclear**
fig <- plot_geo(energia_sus) %>%
  add_trace(
    z = ~`Electricity from nuclear (TWh)`,
    locations = ~Country,
    color = ~`Electricity from nuclear (TWh)`,
    colors = "Blues"
  ) %>%
  layout(title = "Países que usan energía nuclear")
fig

## Warning: Ignoring 126 observations

## **Países que usan energía de combustibles fósiles**
fig <- plot_geo(energia_sus) %>%
  add_trace(
    z = ~`Electricity from fossil fuels (TWh)`,
    locations = ~Country,
    color = ~`Electricity from fossil fuels (TWh)`,
    colors = "Blues"
  ) %>%
  layout(title = "Países que usan energía de combustibles fósiles")
fig

## Warning: Ignoring 21 observations

## **Acceso a la electricidad en Colombia a través del (2000-2020)**
# Acceso a la electricidad en promedio global
electricidad_global <- energia_sus %>%
  group_by(Year) %>%
  summarise(`Access to electricity (% of population)` = mean(`Access to electricity (% of population)`, na.rm = TRUE))

fig <- plot_ly(electricidad_global, x = ~Year, y = ~`Access to electricity (% of population)`, type = 'scatter', mode = 'lines') %>%
  layout(title = 'Acceso a la electricidad (2000-2020)', xaxis = list(title = 'Año'), yaxis = list(title = 'Acceso a la electricidad (%)'))
fig

# Acceso a la electricidad en Colombia
energia_colombia <- filter(energia_sus, Country == "Colombia")

electricidad_colombia <- energia_colombia %>%
  group_by(Year) %>%
  summarise(`Access to electricity (% of population)` = mean(`Access to electricity (% of population)`, na.rm = TRUE))

fig <- plot_ly(electricidad_colombia, x = ~Year, y = ~`Access to electricity (% of population)`, type = 'scatter', mode = 'lines') %>%
  layout(title = 'Acceso a la electricidad en Colombia (2000-2020)', xaxis = list(title = 'Año'), yaxis = list(title = 'Acceso a la electricidad (%)'))
fig

## **Países con mayores emisiones de CO2**
# Agrupar y obtener los máximos de emisiones de CO2
maxco2 <- energia_sus %>%
  group_by(Country) %>%
  summarise(CO2 = max(CO2, na.rm = TRUE)) %>%
  arrange(desc(CO2))

## Warning: There were 13 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `CO2 = max(CO2, na.rm = TRUE)`.
## ℹ In group 12: `Country = "Bahamas"`.
## Caused by warning in `max()`:
## ! ningun argumento finito para max; retornando -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 12 remaining warnings.

top10co2 <- head(maxco2, 10)

# Crear el gráfico de barras
fig1 <- plot_ly(
  top10co2,
  x = ~Country,
  y = ~CO2,
  type = 'bar',
  color = ~CO2,
  title = 'Países con mayores emisiones de CO2',
  labels = list(x = 'Países', y = 'Emisiones CO2 (kt)')
)

fig1 <- fig1 %>% layout(height = 800)

## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()

fig1

## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

## Warning: 'bar' objects don't have these attributes: 'title', 'labels'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Mapa geo-referenciado del porcentaje de población por países con mayores emisiones de CO2**
df_co2 <- energia_sus %>%
  group_by(Country) %>%
  summarise(CO2 = sum(CO2, na.rm = TRUE)) %>%
  arrange(desc(CO2)) %>%
  head(13)

fig2 <- plot_ly(
  df_co2,
  locations = ~Country,
  locationmode = 'country names',
  z = ~CO2,
  type = 'choropleth',
  color = ~CO2,
  colorbar = list(title = 'Emisiones CO2 (kt)'),
  title = 'Países con mayores emisiones de CO2 (2000-2020)'
)

fig2

## Warning: 'choropleth' objects don't have these attributes: 'title'
## Valid attributes include:
## 'autocolorscale', 'coloraxis', 'colorbar', 'colorscale', 'customdata', 'customdatasrc', 'featureidkey', 'geo', 'geojson', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'locationmode', 'locations', 'locationssrc', 'marker', 'meta', 'metasrc', 'name', 'reversescale', 'selected', 'selectedpoints', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'z', 'zauto', 'zmax', 'zmid', 'zmin', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Los principales países por consumo de electricidad a partir de combustibles fósiles (2000-2020)**
consumo_fossil_fuel <- energia_sus %>%
  group_by(Country) %>%
  summarise(Electricity_from_fossil_fuels = sum(`Electricity from fossil fuels (TWh)`, na.rm = TRUE)) %>%
  arrange(desc(Electricity_from_fossil_fuels)) %>%
  head(10)

fig3 <- plot_ly(
  consumo_fossil_fuel,
  x = ~Country,
  y = ~Electricity_from_fossil_fuels,
  type = 'bar',
  title = 'Los principales países por consumo de electricidad a partir de combustibles fósiles (2000-2020)'
)

fig3

## Warning: 'bar' objects don't have these attributes: 'title'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Histograma del consumo de electricidad a partir de los combustibles fósiles (2000-2020)**
# Filtrar datos para Colombia
energia_colombia <- energia_sus %>% filter(Country == 'Colombia')

# Agrupar por año y calcular el consumo total de electricidad a partir de combustibles fósiles
fosil_colombia <- energia_colombia %>%
  group_by(Year) %>%
  summarise(Electricity_from_fossil_fuels = sum(`Electricity from fossil fuels (TWh)`, na.rm = TRUE))

fig4 <- plot_ly(
  fosil_colombia,
  x = ~Year,
  y = ~Electricity_from_fossil_fuels,
  type = 'bar',
  title = 'Consumo de electricidad a partir de combustibles fósiles en Colombia (2000-2020)',
  labels = list(x = 'Año', y = 'Electricidad de combustibles fósiles (TWh)')
)

fig4

## Warning: 'bar' objects don't have these attributes: 'title', 'labels'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Tasa de consumo de combustibles fósiles a lo largo de los años para los 10 principales países**
top_fossil_fuel_countries <- energia_sus %>%
  group_by(Country) %>%
  summarise(mean_fossil_fuels = mean(`Electricity from fossil fuels (TWh)`, na.rm = TRUE)) %>%
  arrange(desc(mean_fossil_fuels))

top_fossil_fuel_countries <- head(top_fossil_fuel_countries, 10)


# Filtrar datos para los países seleccionados
filtered_data <- energia_sus %>%
  filter(Country %in% top_fossil_fuel_countries$Country)

fig5 <- plot_ly(
  filtered_data,
  x = ~Year,
  y = ~`Electricity from fossil fuels (TWh)`,
  color = ~Country,
  type = 'scatter',
  mode = 'lines',
  title = 'Tendencias en la Generación de Electricidad de Combustibles Fósiles entre los Principales 10 Países'
)

fig5

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'fillpattern', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## **Tendencias en la generación de electricidad de combustibles fósiles entre los 10 principales países**
# Asegurar que Colombia esté en la lista de países seleccionados
# Asegurar que Colombia esté en la lista de países seleccionados
top_fossil_fuel_countries <- energia_sus %>%
  group_by(Country) %>%
  summarise(mean_fossil_fuels = mean(`Electricity from fossil fuels (TWh)`, na.rm = TRUE)) %>%
  arrange(desc(mean_fossil_fuels)) %>%
  slice(1:3)  # Selecciona los 3 principales países

# Filtrar los datos para los países seleccionados (top 3 + Colombia)
filtered_data <- energia_sus %>%
  filter(Country %in% top_fossil_fuel_countries$Country)

# Crear el gráfico de líneas para mostrar la tendencia en el uso de electricidad de combustibles fósiles
fig <- plot_ly(
  data = filtered_data,
  x = ~Year,
  y = ~`Electricity from fossil fuels (TWh)`,
  color = ~Country,
  type = 'scatter',
  mode = 'lines+markers'
) %>%
  layout(
    title = 'Tendencias en la Generación de Electricidad de Combustibles Fósiles entre los Principales 10 Países y Colombia',
    xaxis = list(title = 'Año'),
    yaxis = list(title = 'Electricidad de Combustibles Fósiles (TWh)')
  )

fig

# Definir los países que se quieren visualizar
paises <- c('United States', 'Colombia', 'China', 'Brazil')

# Filtrar los datos para los países seleccionados
tenden_co2 <- energia_sus %>% 
  filter(Country %in% paises)

# Crear el gráfico de líneas para mostrar la tendencia de emisiones de CO2
ggplot(tenden_co2, aes(x = Year, y = CO2, color = Country)) +
  geom_line() +
  ggtitle('Tendencia de Emisiones de CO2 para Estados Unidos, Colombia, China y Brasil') +
  theme_minimal()

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

# **Tendencia de emisiones de CO2 para Colombia y Brasil**
# Definir los países que se quieren visualizar
paises <- c('Colombia', 'Brazil', 'Argentina', 'Chile', 'Ecuador', 'Uruguay')

# Filtrar los datos para los países seleccionados
tenden_co2 <- energia_sus %>% 
  filter(Country %in% paises)

# Crear el gráfico de líneas para mostrar la tendencia de emisiones de CO2
ggplot(tenden_co2, aes(x = Year, y = CO2, color = Country)) +
  geom_line() +
  ggtitle('Tendencia de Emisiones de CO2 para Colombia y Brasil') +
  theme_minimal()

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).

# **Eliminación de los NAN*
# Especificar las columnas a eliminar
columns_to_drop <- c("Financial.flows.to.developing.countries..US....", 
                     "Renewables....equivalent.primary.energy.", 
                     "CO2", 
                     "gdp_growth",
                     "Renewable.electricity.generating.capacity.per.capita")

# Eliminar las columnas
data_clean <- energia_sus[, !(names(energia_sus) %in% columns_to_drop)]

data_clean <- as.data.frame(data_clean)

# **Imputación**
# Visualización del mapa de valores faltantes
library(ggplot2)
library(mice)

## 
## Adjuntando el paquete: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

# Mapa de valores faltantes
missing_data <- data_clean %>% 
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "missing_count")

ggplot(missing_data, aes(x = variable, y = missing_count)) +
  geom_bar(stat = 'identity') +
  coord_flip() +
  ggtitle('Mapa de valores faltantes en el DataFrame data_clean') +
  theme_minimal()

# **Gráfico de la distribución para cada una de las variables**
library(ggplot2)

# Lista de variables a graficar
variables <- c(
  'Access to electricity (% of population)', 
  'Access to clean fuels for cooking',  
  'Renewable energy share in the total final energy consumption (%)', 
  'Electricity from fossil fuels (TWh)', 
  'Electricity from nuclear (TWh)', 
  'Electricity from renewables (TWh)', 
  'Low-carbon electricity (% electricity)', 
  'Primary energy consumption per capita (kWh/person)', 
  'Energy intensity level of primary energy (MJ/$2017 PPP GDP)',  
  'gdp_per_capita'
)

# Crear un histograma para cada variable en la lista
# Crear un histograma para cada variable en la lista
library(rlang)  # Cargar rlang para usar `ensym`

## 
## Adjuntando el paquete: 'rlang'

## The following object is masked from 'package:data.table':
## 
##     :=

for (column in variables) {
  column_sym <- ensym(column)  # Convertir el nombre de la columna en símbolo
  ggplot(data_clean, aes(x = !!column_sym)) +  # Usar !! para desreferenciar el símbolo
    geom_histogram(binwidth = 30, fill = 'blue', alpha = 0.7, color = 'black') +
    labs(title = paste('Distribución de', column), x = column, y = 'Frecuencia') +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0.5)) +
    geom_density(color = "red", size = 1) +
    ggtitle(paste("Distribución de", column))
}

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Cargar las bibliotecas necesarias
library(dplyr)
library(tidyr)
library(VIM)

## Cargando paquete requerido: colorspace

## Cargando paquete requerido: grid

## VIM is ready to use.

## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

## 
## Adjuntando el paquete: 'VIM'

## The following object is masked from 'package:datasets':
## 
##     sleep

library(ggplot2)
library(Amelia)

## Cargando paquete requerido: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.2, built: 2024-04-10)
## ## Copyright (C) 2005-2024 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

# Suponiendo que 'energia_sus' es tu data frame original con valores faltantes
# Separar columnas numéricas y categóricas
numeric_cols <- select_if(energia_sus, is.numeric)
categorical_cols <- select_if(energia_sus, Negate(is.numeric))

# Imputar valores faltantes en datos numéricos con la media
numeric_imputed <- numeric_cols %>%
  mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

# Imputar valores faltantes en datos categóricos con el valor más frecuente
categorical_imputed <- categorical_cols %>%
  mutate(across(everything(), ~ ifelse(is.na(.), names(sort(table(.), decreasing = TRUE)[1]), .)))

# Combinar datos numéricos y categóricos imputados
data_imputed <- bind_cols(numeric_imputed, categorical_imputed)

# Imputar todo el data frame con la estrategia de 'valor más frecuente'
imputer_most_frequent <- function(column) {
  if (is.numeric(column)) {
    column[is.na(column)] <- mean(column, na.rm = TRUE)
  } else {
    column[is.na(column)] <- names(sort(table(column), decreasing = TRUE)[1])
  }
  return(column)
}

data_imputed <- energia_sus %>%
  mutate(across(everything(), imputer_most_frequent))

# Crear un gráfico del mapa de valores faltantes
missmap(data_imputed, main = "Mapa de valores faltantes en el DataFrame data_imputed", col = c("red", "blue"), legend = FALSE)

# Establecer el estilo de los gráficos y mostrar un gráfico personalizado con ggplot2
theme_set(theme_minimal())

# Utilizar pivot_longer() en lugar de melt()
na_data <- as.data.frame(is.na(data_imputed)) %>%
  mutate(row_id = row_number()) %>%
  pivot_longer(cols = -row_id, names_to = "Var2", values_to = "value")

# Crear el gráfico del mapa de valores faltantes
ggplot(na_data, aes(x = Var2, y = row_id, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("TRUE" = "red", "FALSE" = "blue"), name = "Faltante") +
  labs(title = "Mapa de valores faltantes en el DataFrame data_imputed",
       x = "Variables",
       y = "Observaciones") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5))

# **Modelo ARIMA**
# **Gráfico de electricidad renovable**

# Filtrar los datos para que solo incluyan Brasil, Colombia y Alemania
data_filtered <- data_imputed[data_imputed$Country %in% c("Brazil", "Colombia", "Germany"), ]

# Cargar el paquete plotly
library(plotly)

# Crear el gráfico interactivo con los nombres de los países seleccionados
fig <- plot_ly(data_filtered, 
               x = ~Year, 
               y = ~`Electricity from renewables (TWh)`, 
               color = ~Country,  # Agrupar por país
               type = 'scatter', 
               mode = 'lines') %>%
  layout(title = 'Electricity from Renewables (TWh) en Brasil, Colombia y Alemania',
         xaxis = list(title = 'Año'), 
         yaxis = list(title = 'Electricidad de Renovables (TWh)'))

# Mostrar el gráfico interactivo
fig

# **Autocorrelación de Electricidad renovable**
# Calcular la autocorrelación sobre la variable 'Electricity from renewables (TWh)'
autocorr_values <- acf(data_filtered$`Electricity from renewables (TWh)`, plot = FALSE)$acf

# Crear el gráfico interactivo de autocorrelación
fig_acf <- plot_ly(x = ~seq_along(autocorr_values), y = ~autocorr_values, 
                   type = 'scatter', mode = 'markers', 
                   marker = list(size = 8, color = 'blue'), 
                   name = 'Autocorrelación - Puntos') %>%
  add_lines(x = ~seq_along(autocorr_values), 
            y = ~autocorr_values, 
            line = list(color = 'blue')) %>%
  layout(title = 'Autocorrelación - Electricity from Renewables (TWh)',
         xaxis = list(title = 'Lag'),
         yaxis = list(title = 'Autocorrelación'))
fig_acf

## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...

# **Autocorrelación Parcial**
# Calcular la autocorrelación parcial sobre la variable 'Electricity from renewables (TWh)'
partial_autocorr_values <- pacf(data_filtered$`Electricity from renewables (TWh)`, plot = FALSE)$acf

# Crear el gráfico interactivo de autocorrelación parcial
fig_pacf <- plot_ly(x = ~seq_along(partial_autocorr_values), y = ~partial_autocorr_values, 
                    type = 'scatter', mode = 'markers', 
                    marker = list(size = 8, color = 'blue'), 
                    name = 'Autocorrelación Parcial - Puntos') %>%
  add_lines(x = ~seq_along(partial_autocorr_values), 
            y = ~partial_autocorr_values, 
            line = list(color = 'blue')) %>%
  layout(title = 'Autocorrelación Parcial - Electricity from Renewables (TWh)',
         xaxis = list(title = 'Lag'),
         yaxis = list(title = 'Autocorrelación Parcial'))
fig_pacf

## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...

#install.packages("tseries")

# **Test de Dickey-Fuller**
# Realizar la prueba de Dickey-Fuller Aumentada (ADF)
# Cargar el paquete tseries
library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

# Realizar la prueba de Dickey-Fuller Aumentada (ADF) sobre la variable 'Electricity from renewables (TWh)'
adf_result <- adf.test(data_filtered$`Electricity from renewables (TWh)`)

# Imprimir los resultados del test ADF
print(adf_result)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_filtered$`Electricity from renewables (TWh)`
## Dickey-Fuller = -1.3794, Lag order = 3, p-value = 0.8262
## alternative hypothesis: stationary

# **Descomposición de la serie**
# Descomposición de la serie
decompose_model <- stl(ts(data_filtered$`Electricity from renewables (TWh)`, frequency = 20), s.window = "periodic")

# Crear un gráfico para la serie de tiempo original
fig1 <- plot_ly(data = data_filtered, x = ~Year, y = ~`Electricity from renewables (TWh)`, 
                 type = 'scatter', mode = 'lines', 
                 line = list(color = 'blue', width = 2), 
                 name = 'Electricidad de Renovables (TWh)') %>%
  layout(title = 'Serie de tiempo - Electricity from Renewables (TWh)',
         xaxis = list(title = 'Año'), 
         yaxis = list(title = 'Electricidad de Renovables (TWh)'))
fig1

# Crear un gráfico para el componente tendencial
fig2 <- plot_ly(x = ~data_filtered$Year, 
                 y = ~decompose_model$time.series[, "trend"], 
                 type = 'scatter', mode = 'lines', 
                 line = list(color = 'red', width = 2), 
                 name = 'Componente tendencial') %>%
  layout(title = 'Componente tendencial',
         xaxis = list(title = 'Año'), 
         yaxis = list(title = 'Valores'))
fig2

# Crear un gráfico para el componente estacional
fig3 <- plot_ly(x = ~data_filtered$Year, 
                 y = ~decompose_model$time.series[, "seasonal"], 
                 type = 'scatter', mode = 'lines', 
                 line = list(color = 'green', width = 2), 
                 name = 'Componente estacional') %>%
  layout(title = 'Componente estacional',
         xaxis = list(title = 'Año'), 
         yaxis = list(title = 'Valores'))
fig3

# Crear un gráfico para las variaciones irregulares (ruido)
fig4 <- plot_ly(x = ~data_filtered$Year, 
                 y = ~decompose_model$time.series[, "remainder"], 
                 type = 'scatter', mode = 'lines', 
                 line = list(color = 'black', width = 2), 
                 name = 'Variaciones irregulares (Ruido)') %>%
  layout(title = 'Variaciones irregulares (Ruido)',
         xaxis = list(title = 'Año'), 
         yaxis = list(title = 'Valores'))
fig4

## **P-Value**
# Aplicar el test ADF solo a los valores finitos (no NA) de los residuos
resid_non_nan <- decompose_model$resid[is.finite(decompose_model$resid)]

# Imprimir el p-valor
cat('p-valor:', adf_result$p.value, "\n")

## p-valor: 0.8262307

## **Prueba de normalidad.**
# Evaluar la normalidad del conjunto de datos residuales
library(stats)  # Para la prueba Shapiro-Wilk

resid_clean <- decompose_model$resid
resid_clean[is.infinite(resid_clean)] <- NA  # Reemplazar infinitos por NA
resid_clean <- na.omit(resid_clean)          # Eliminar los NA

## **Residuos**
# Crear el gráfico Q-Q interactivo con Plotly
# Asegúrate de que residuals es un vector numérico
residuals <- as.numeric(decompose_model$resid)  # Cambia esto si aún no lo has hecho

# Eliminar NA e infinitos
residuals <- residuals[is.finite(residuals)]  # Eliminar NAs y valores infinitos

# Verificar la longitud después de la limpieza
cat("Número de residuos después de la limpieza:", length(residuals), "\n")

## Número de residuos después de la limpieza: 0

## **Modelo ARIMA**
## **Diferenciación**
nlag <- 30  # Definir el número de lags

# Crear el gráfico para la serie original
fig1 <- plot_ly(data_filtered, x = ~Year, y = ~`Electricity from renewables (TWh)`, 
                 type = 'scatter', mode = 'lines', 
                 line = list(width = 1, color = 'blue')) %>%
  layout(title = 'Serie Original',
         xaxis = list(title = 'Fecha'),
         yaxis = list(title = 'Precio Ajustado'))

# Mostrar el gráfico interactivo de la serie original
fig1

# Gráfico de ACF para la serie original
acf_values <- acf(data_filtered$`Electricity from renewables (TWh)`, plot = FALSE, lag.max = nlag)
fig2 <- plot_ly()

# Añadir los puntos de ACF conectados al eje X
for (i in seq_along(acf_values$acf)) {
  fig2 <- fig2 %>%
    add_trace(x = c(i-1, i-1), y = c(0, acf_values$acf[i]), mode = 'lines', line = list(color = 'blue', width = 0.5)) %>%
    add_trace(x = i-1, y = acf_values$acf[i], mode = 'markers', marker = list(color = 'blue', size = 8))
}

fig2 <- fig2 %>%
  layout(title = 'Autocorrelación (ACF)',
         xaxis = list(title = 'Lags'),
         yaxis = list(title = 'ACF'))

# Mostrar el gráfico interactivo de ACF
fig2

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

# Gráfico de PACF para la serie original
pacf_values <- pacf(data_filtered$`Electricity from renewables (TWh)`, plot = FALSE, lag.max = nlag)
fig3 <- plot_ly()

# Añadir los puntos de PACF conectados al eje X
for (i in seq_along(pacf_values$acf)) {
  fig3 <- fig3 %>%
    add_trace(x = c(i-1, i-1), y = c(0, pacf_values$acf[i]), mode = 'lines', line = list(color = 'blue', width = 0.5)) %>%
    add_trace(x = i-1, y = pacf_values$acf[i], mode = 'markers', marker = list(color = 'blue', size = 8))
}

fig3 <- fig3 %>%
  layout(title = 'Autocorrelación Parcial (PACF)',
         xaxis = list(title = 'Lags'),
         yaxis = list(title = 'PACF'))

# Mostrar el gráfico interactivo de PACF
fig3

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

# Crear el gráfico para el diferenciador de primer orden
differenced_data <- diff(data_filtered$`Electricity from renewables (TWh)`)
differenced_years <- data_filtered$Year[-1]  # Ajustar el año para el gráfico

fig4 <- plot_ly(x = differenced_years, y = differenced_data, 
                 type = 'scatter', mode = 'lines', 
                 line = list(width = 1, color = 'green')) %>%
  layout(title = 'Diferenciador de Primer Orden',
         xaxis = list(title = 'Fecha'),
         yaxis = list(title = 'Diferencia'))

# Mostrar el gráfico interactivo del diferenciador de primer orden
fig4

# Gráfico de ACF para el diferenciador de primer orden
# Calcular el diferenciador de primer orden
differenced_values <- data_filtered$`Electricity from renewables (TWh)` %>% 
                       diff() %>% 
                       na.omit()  # Eliminar el NA resultante

# Asegúrate de que el eje x tenga la misma longitud
years <- data_filtered$Year[-1]  # Eliminar el primer año correspondiente al NA en el diferenciador

# Crear el gráfico para el diferenciador de primer orden
fig4 <- plot_ly() %>%
    add_trace(x = years, 
              y = differenced_values, 
              type = 'scatter', 
              mode = 'lines', 
              line = list(width = 1, color = 'green')) %>%
    layout(title = 'Diferenciador de Primer Orden',
           xaxis = list(title = 'Fecha'),
           yaxis = list(title = 'Diferencia'),
           height = 400)

## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()

# Mostrar el gráfico interactivo del diferenciador de primer orden
fig4

# Gráfico de PACF para el diferenciador de primer orden
pacf_diff_values <- pacf(data_filtered$`Electricity from renewables (TWh)` %>% diff() %>% na.omit(), plot = FALSE, lag.max = nlag)
fig6 <- plot_ly()

# Añadir los puntos de PACF del diferenciador conectados al eje X
for (i in seq_along(pacf_diff_values$acf)) {
  fig6 <- fig6 %>%
    add_trace(x = c(i-1, i-1), y = c(0, pacf_diff_values$acf[i]), mode = 'lines', line = list(color = 'green', width = 0.5)) %>%
    add_trace(x = i-1, y = pacf_diff_values$acf[i], mode = 'markers', marker = list(color = 'green', size = 8))
}

fig6 <- fig6 %>%
  layout(title = 'Autocorrelación Parcial (PACF) del Diferenciador',
         xaxis = list(title = 'Lags'),
         yaxis = list(title = 'PACF'))

# Mostrar el gráfico interactivo de PACF del diferenciador
fig6

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

# Realizar la prueba ADF para el diferenciador
adjclose_diff <- data_filtered$`Electricity from renewables (TWh)` %>% diff() %>% na.omit()
adf_result2 <- adf.test(adjclose_diff, alternative = "stationary", k = 0) # Ajusta el parámetro k si es necesario

## Warning in adf.test(adjclose_diff, alternative = "stationary", k = 0): p-value
## smaller than printed p-value

# Imprimir resultados de la prueba ADF
cat('ADF Statistic:', adf_result2$statistic, "\n")

## ADF Statistic: -7.621159

cat('p-value:', adf_result2$p.value, "\n")

## p-value: 0.01

# Cargar librerías necesarias
library(forecast)  # Para el modelo ARIMA
library(dplyr)     # Para manipulación de datos
library(tidyr)     # Para manipulación de datos
library(ggplot2)   # Para visualización

## **Criterios AIC, BIC y HQIC**
# Número total de observaciones en el conjunto de datos filtrado
n_data <- nrow(data_filtered)

# Definir el tamaño del conjunto de prueba (por ejemplo, 30 observaciones)
n_test <- 30

# Calcular el tamaño del conjunto de entrenamiento
train_size <- n_data - n_test

# Dividir los datos en conjuntos de entrenamiento y prueba
train <- data_filtered$`Electricity from renewables (TWh)`[1:train_size]
dates_train <- data_filtered$Year[1:train_size]

test <- data_filtered$`Electricity from renewables (TWh)`[(train_size + 1):(train_size + n_test)]
dates_test <- data_filtered$Year[(train_size + 1):(train_size + n_test)]

# Mostrar las formas de los conjuntos de entrenamiento y prueba
cat("Train:", length(train), "\n")

## Train: 33

cat("Test:", length(test), "\n")

## Test: 30

# Obtener los conjuntos de prueba de diferentes tamaños
test7 <- data_filtered$`Electricity from renewables (TWh)`[(train_size + 1):(train_size + 7)]
dates_test7 <- data_filtered$Year[(train_size + 1):(train_size + 7)]
test14 <- data_filtered$`Electricity from renewables (TWh)`[(train_size + 1):(train_size + 14)]
dates_test14 <- data_filtered$Year[(train_size + 1):(train_size + 14)]
test21 <- data_filtered$`Electricity from renewables (TWh)`[(train_size + 1):(train_size + 21)]
dates_test21 <- data_filtered$Year[(train_size + 1):(train_size + 21)]
test28 <- data_filtered$`Electricity from renewables (TWh)`[(train_size + 1):(train_size + 28)]
dates_test28 <- data_filtered$Year[(train_size + 1):(train_size + 28)]

# Crear DataFrames para los conjuntos de entrenamiento y prueba
train_df <- data_filtered[1:train_size, c("Electricity from renewables (TWh)")]
test_df <- data_filtered[(train_size + 1):(train_size + n_test), c("Electricity from renewables (TWh)")]

# Función para encontrar el mejor modelo ARIMA basado en AIC, BIC y HQIC
best_model <- function(train) {
  best_aic <- Inf
  best_bic <- Inf
  best_hqic <- Inf
  
  best_order_aic <- NULL
  best_order_bic <- NULL
  best_order_hqic <- NULL
  
  best_mdl_aic <- NULL
  best_mdl_bic <- NULL
  best_mdl_hqic <- NULL
  
  pq_rng <- 0:4
  d_rng <- 0:2
  
  # Iterar sobre todos los posibles valores de p, d y q
  for (p in pq_rng) {
    for (d in d_rng) {
      for (q in pq_rng) {
        # Ajustar el modelo ARIMA
        tmp_mdl <- tryCatch(
          Arima(train, order = c(p, d, q)),
          error = function(e) NULL
        )
        
        if (!is.null(tmp_mdl)) {
          # Obtener los valores de AIC, BIC y HQIC
          tmp_aic <- AIC(tmp_mdl)
          tmp_bic <- BIC(tmp_mdl)
          
          # Calcular HQIC manualmente
          n <- length(train)  # Número de observaciones
          tmp_hqic <- tmp_aic + 2 * (log(log(n)))  # HQIC: AIC + 2 * log(log(n))
          
          # Comparar el AIC
          if (tmp_aic < best_aic) {
            best_aic <- tmp_aic
            best_order_aic <- c(p, d, q)
            best_mdl_aic <- tmp_mdl
          }
          
          # Comparar el BIC
          if (tmp_bic < best_bic) {
            best_bic <- tmp_bic
            best_order_bic <- c(p, d, q)
            best_mdl_bic <- tmp_mdl
          }
          
          # Comparar el HQIC
          if (tmp_hqic < best_hqic) {
            best_hqic <- tmp_hqic
            best_order_hqic <- c(p, d, q)
            best_mdl_hqic <- tmp_mdl
          }
        }
      }
    }
  }
  
  return(list(
    AIC = list(best_aic = best_aic, best_order = best_order_aic, best_model = best_mdl_aic),
    BIC = list(best_bic = best_bic, best_order = best_order_bic, best_model = best_mdl_bic),
    HQIC = list(best_hqic = best_hqic, best_order = best_order_hqic, best_model = best_mdl_hqic)
  ))
}

# Usar la función con tus datos de entrenamiento
results <- best_model(train_df$`Electricity from renewables (TWh)`)

# Mostrar resultados del AIC
cat("Mejor modelo basado en AIC:", results$AIC$best_aic, "\n")

## Mejor modelo basado en AIC: 373.612

cat("Mejor orden para AIC:", sprintf("(p=%d, d=%d, q=%d)", 
                                      results$AIC$best_order[1], 
                                      results$AIC$best_order[2], 
                                      results$AIC$best_order[3]), "\n")

## Mejor orden para AIC: (p=0, d=2, q=1)

# Mostrar resultados del BIC
cat("Mejor modelo basado en BIC:", results$BIC$best_bic, "\n")

## Mejor modelo basado en BIC: 376.4799

cat("Mejor orden para BIC:", sprintf("(p=%d, d=%d, q=%d)", 
                                      results$BIC$best_order[1], 
                                      results$BIC$best_order[2], 
                                      results$BIC$best_order[3]), "\n")

## Mejor orden para BIC: (p=0, d=2, q=1)

# Mostrar resultados del HQIC
cat("Mejor modelo basado en HQIC:", results$HQIC$best_hqic, "\n")

## Mejor modelo basado en HQIC: 376.1155

cat("Mejor orden para HQIC:", sprintf("(p=%d, d=%d, q=%d)", 
                                       results$HQIC$best_order[1], 
                                       results$HQIC$best_order[2], 
                                       results$HQIC$best_order[3]), "\n")

## Mejor orden para HQIC: (p=0, d=2, q=1)

library(forecast)
library(plotly)

# Función para el modelo ARIMA con pronóstico rodante
arima_rolling <- function(train_list, test, best_order_aic) {
  # Inicializar un historial con los datos de entrenamiento
  history <- unlist(train_list)
  predictions <- numeric()  # Inicializar un vector para las predicciones
  
  # Asegúrate de que 'test' sea un vector
  if (is.data.frame(test)) {
    test <- as.numeric(test[,1])  # Convertir a vector numérico si es un data frame
  }
  
  # Iterar sobre cada punto de tiempo en el conjunto de prueba
  for (t in seq_along(test)) {
    # Ajustar modelo ARIMA
    tryCatch({
      model <- Arima(history, order = best_order_aic)
      model_fit <- fit(model)  # Ajustar el modelo
    }, error = function(e) {
      cat(sprintf("Error al ajustar el modelo en el paso %d: %s\n", t, e$message))
      break
    })
    
    # Realizar la predicción
    yhat <- forecast(model_fit, h = 1)$mean
    predictions <- c(predictions, yhat)  # Almacenar la predicción

    # Obtener el valor real observado en el conjunto de prueba
    obs <- test[t]
    
    # Añadir la observación actual al conjunto de datos históricos
    if (is.numeric(obs)) {
      history <- c(history, obs)
    } else {
      cat(sprintf("Valor inesperado en test en el paso %d: %s\n", t, obs))
    }
  }
  
  return(predictions)
}

# Cargar las librerías necesarias
library(forecast)
library(ggplot2)
library(dplyr)
library(lmtest)

## Cargando paquete requerido: zoo

## 
## Adjuntando el paquete: 'zoo'

## The following objects are masked from 'package:data.table':
## 
##     yearmon, yearqtr

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

# Suponiendo que 'train', 'test7', 'test14', 'test21', 'test28' son tus conjuntos de datos
# Asegúrate de que estos datos estén correctamente definidos antes de ajustar el modelo

# Ajuste del modelo ARIMA automáticamente
model_fit_aic <- auto.arima(train)

# Función para el modelo ARIMA sin rolling forecast
arima_sin_rolling <- function(test, modelo) {
  # Calcular el número de pasos de predicción basados en el tamaño del conjunto de prueba
  forecast_steps <- length(test)
  
  # Realizar las predicciones para el número de pasos determinado
  forecast_tipo <- forecast(modelo, h = forecast_steps)
  
  # Obtener los valores predichos
  forecast_values <- as.numeric(forecast_tipo$mean)
  
  # Imprimir los valores observados y predichos
  for (i in seq_along(test)) {
    observado <- test[i]  # Valores observados
    predicho <- forecast_values[i]  # Acceder a los valores predichos por índice
    cat(sprintf("Predicho: %.2f, Observado: %.2f\n", predicho, observado))
  }
  
  # Devolver las predicciones
  return(forecast_values)
}

# Llamadas a la función para diferentes horizontes de predicción
cat('ARIMA sin Rolling AIC - Horizonte de 7 días.\n')

## ARIMA sin Rolling AIC - Horizonte de 7 días.

yhat7_sin_aic <- arima_sin_rolling(test7, model_fit_aic)

## Predicho: 48.81, Observado: 47.58
## Predicho: 48.81, Observado: 50.48
## Predicho: 48.81, Observado: 48.06
## Predicho: 48.81, Observado: 49.00
## Predicho: 48.81, Observado: 49.54
## Predicho: 48.81, Observado: 61.39
## Predicho: 48.81, Observado: 60.08

cat('\nARIMA sin Rolling AIC - Horizonte de 14 días.\n')

## 
## ARIMA sin Rolling AIC - Horizonte de 14 días.

yhat14_sin_aic <- arima_sin_rolling(test14, model_fit_aic)

## Predicho: 48.81, Observado: 47.58
## Predicho: 48.81, Observado: 50.48
## Predicho: 48.81, Observado: 48.06
## Predicho: 48.81, Observado: 49.00
## Predicho: 48.81, Observado: 49.54
## Predicho: 48.81, Observado: 61.39
## Predicho: 48.81, Observado: 60.08
## Predicho: 48.81, Observado: 54.31
## Predicho: 48.81, Observado: 49.97
## Predicho: 48.81, Observado: 35.47
## Predicho: 48.81, Observado: 37.90
## Predicho: 48.81, Observado: 44.48
## Predicho: 48.81, Observado: 46.67
## Predicho: 48.81, Observado: 57.97

cat('\nARIMA sin Rolling AIC - Horizonte de 21 días.\n')

## 
## ARIMA sin Rolling AIC - Horizonte de 21 días.

yhat21_sin_aic <- arima_sin_rolling(test21, model_fit_aic)

## Predicho: 48.81, Observado: 47.58
## Predicho: 48.81, Observado: 50.48
## Predicho: 48.81, Observado: 48.06
## Predicho: 48.81, Observado: 49.00
## Predicho: 48.81, Observado: 49.54
## Predicho: 48.81, Observado: 61.39
## Predicho: 48.81, Observado: 60.08
## Predicho: 48.81, Observado: 54.31
## Predicho: 48.81, Observado: 49.97
## Predicho: 48.81, Observado: 35.47
## Predicho: 48.81, Observado: 37.90
## Predicho: 48.81, Observado: 44.48
## Predicho: 48.81, Observado: 46.67
## Predicho: 48.81, Observado: 57.97
## Predicho: 48.81, Observado: 63.40
## Predicho: 48.81, Observado: 72.51
## Predicho: 48.81, Observado: 89.38
## Predicho: 48.81, Observado: 94.28
## Predicho: 48.81, Observado: 95.94
## Predicho: 48.81, Observado: 105.18
## Predicho: 48.81, Observado: 124.04

cat('\nARIMA sin Rolling AIC - Horizonte de 28 días.\n')

## 
## ARIMA sin Rolling AIC - Horizonte de 28 días.

yhat28_sin_aic <- arima_sin_rolling(test28, model_fit_aic)

## Predicho: 48.81, Observado: 47.58
## Predicho: 48.81, Observado: 50.48
## Predicho: 48.81, Observado: 48.06
## Predicho: 48.81, Observado: 49.00
## Predicho: 48.81, Observado: 49.54
## Predicho: 48.81, Observado: 61.39
## Predicho: 48.81, Observado: 60.08
## Predicho: 48.81, Observado: 54.31
## Predicho: 48.81, Observado: 49.97
## Predicho: 48.81, Observado: 35.47
## Predicho: 48.81, Observado: 37.90
## Predicho: 48.81, Observado: 44.48
## Predicho: 48.81, Observado: 46.67
## Predicho: 48.81, Observado: 57.97
## Predicho: 48.81, Observado: 63.40
## Predicho: 48.81, Observado: 72.51
## Predicho: 48.81, Observado: 89.38
## Predicho: 48.81, Observado: 94.28
## Predicho: 48.81, Observado: 95.94
## Predicho: 48.81, Observado: 105.18
## Predicho: 48.81, Observado: 124.04
## Predicho: 48.81, Observado: 143.04
## Predicho: 48.81, Observado: 152.34
## Predicho: 48.81, Observado: 162.54
## Predicho: 48.81, Observado: 188.79
## Predicho: 48.81, Observado: 189.67
## Predicho: 48.81, Observado: 216.32
## Predicho: 48.81, Observado: 222.07

# Crear gráficos para cada horizonte de predicción
par(mfrow = c(2, 2), mar = c(4, 4, 2, 1))  # Ajustar la ventana de gráficos

# Gráfico para Horizonte de 7 días
plot(dates_train, train, type = "l", col = '#00008B', xlab = "Date", ylab = "Electricity", main = "7-Day Horizon")
lines(dates_test7, test7, col = '#9A32CD')
lines(dates_test7, yhat7_sin_aic, col = '#EE7600')
legend("topright", legend = c("Train", "Test", "Forecast"), col = c('#00008B', '#9A32CD', '#EE7600'), lty = 1)

# Gráfico para Horizonte de 14 días
plot(dates_train, train, type = "l", col = '#00008B', xlab = "Date", ylab = "Electricity", main = "14-Day Horizon")
lines(dates_test14, test14, col = '#9A32CD')
lines(dates_test14, yhat14_sin_aic, col = '#EE7600')
legend("topright", legend = c("Train", "Test", "Forecast"), col = c('#00008B', '#9A32CD', '#EE7600'), lty = 1)

# Gráfico para Horizonte de 21 días
plot(dates_train, train, type = "l", col = '#00008B', xlab = "Date", ylab = "Electricity", main = "21-Day Horizon")
lines(dates_test21, test21, col = '#9A32CD')
lines(dates_test21, yhat21_sin_aic, col = '#EE7600')
legend("topright", legend = c("Train", "Test", "Forecast"), col = c('#00008B', '#9A32CD', '#EE7600'), lty = 1)

# Gráfico para Horizonte de 28 días
plot(dates_train, train, type = "l", col = '#00008B', xlab = "Date", ylab = "Electricity", main = "28-Day Horizon")
lines(dates_test28, test28, col = '#9A32CD')
lines(dates_test28, yhat28_sin_aic, col = '#EE7600')
legend("topright", legend = c("Train", "Test", "Forecast"), col = c('#00008B', '#9A32CD', '#EE7600'), lty = 1)

# Análisis de residuos del modelo ARIMA basado en el criterio AIC
checkresiduals(model_fit_aic)  # Esta función de forecast package muestra varios diagnósticos

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 0.48703, df = 7, p-value = 0.9995
## 
## Model df: 0.   Total lags used: 7

# Realizar la prueba de Shapiro-Wilk para la normalidad de los residuos
shapiro_test <- shapiro.test(residuals(model_fit_aic))
cat(sprintf("Statistic = %.3f\n", shapiro_test$statistic))

## Statistic = 0.304

cat(sprintf("P-Value = %.3f\n", shapiro_test$p.value))

## P-Value = 0.000

# Interpretar el resultado de la prueba
if (shapiro_test$p.value < 0.05) {
  cat("Se rechaza la hipótesis nula, por lo tanto los residuales no siguen una distribución normal.\n")
} else {
  cat("No se rechaza la hipótesis nula, por lo tanto los residuales siguen una distribución normal.\n")
}

## Se rechaza la hipótesis nula, por lo tanto los residuales no siguen una distribución normal.

# Independencia de los residuos
# Extraer residuos del modelo ARIMA ajustado
residuos <- residuals(model_fit_aic)

# Aplicar la prueba Durbin-Watson a los residuos
dw <- dwtest(residuos ~ 1)  # El "~ 1" indica que no estamos usando ninguna variable predictora

cat(sprintf("Statistic d = %.3f\n", dw$statistic))

## Statistic d = 2.022

if (dw$statistic < 1.5 || dw$statistic > 2.5) {
  cat("La estadística Durbin-Watson indica posible autocorrelación en los residuos.\n")
} else {
  cat("No hay evidencia significativa de autocorrelación en los residuos.\n")
}

## No hay evidencia significativa de autocorrelación en los residuos.

EDA_F

Valeria Bustos

2024-11-01