rm(list = ls())
# Cargar librerías
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Archivos CSV
file_energy <- "C:\\Users\\Leonardo\\Downloads\\Energy consumption per capita.csv"
file_emissions <- "C:\\Users\\Leonardo\\Downloads\\Carbon dioxide emissions.csv"
file_capacity <- "C:\\Users\\Leonardo\\Downloads\\Electricity - installed generating capacity.csv"

# Cargar datasets
energy <- read_csv(file_energy) %>%
  select(name, Btu_per_person = `Btu/person`)
## Rows: 195 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): Btu/person
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
emissions <- read_csv(file_emissions) %>%
  select(name, CO2_emissions = `metric tonnes of CO2`)
## Rows: 218 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): metric tonnes of CO2
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
capacity <- read_csv(file_capacity) %>%
  select(name, Installed_kW = `kW`)
## Rows: 211 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): kW
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Mostrar estructura
str(energy)
## tibble [195 × 2] (S3: tbl_df/tbl/data.frame)
##  $ name          : chr [1:195] "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
##  $ Btu_per_person: num [1:195] 7.67e+08 6.54e+08 5.14e+08 4.96e+08 4.66e+08 ...
str(emissions)
## tibble [218 × 2] (S3: tbl_df/tbl/data.frame)
##  $ name         : chr [1:218] "China" "United States" "India" "Russia" ...
##  $ CO2_emissions: num [1:218] 1.35e+10 4.94e+09 2.80e+09 1.84e+09 1.05e+09 ...
str(capacity)
## tibble [211 × 2] (S3: tbl_df/tbl/data.frame)
##  $ name        : chr [1:211] "China" "United States" "India" "Japan" ...
##  $ Installed_kW: num [1:211] 2.59e+09 1.20e+09 4.87e+08 3.50e+08 3.01e+08 ...
# Fusionar datasets por la columna `name`
merged_data <- energy %>%
  inner_join(emissions, by = "name") %>%
  inner_join(capacity, by = "name")

# Verificar estructura del dataframe fusionado
str(merged_data)
## tibble [195 × 4] (S3: tbl_df/tbl/data.frame)
##  $ name          : chr [1:195] "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
##  $ Btu_per_person: num [1:195] 7.67e+08 6.54e+08 5.14e+08 4.96e+08 4.66e+08 ...
##  $ CO2_emissions : num [1:195] 1.22e+08 2.42e+08 4.33e+07 2.68e+08 1.22e+07 ...
##  $ Installed_kW  : num [1:195] 11414000 12538000 6983000 39915000 1265000 ...
head(merged_data)
## # A tibble: 6 × 4
##   name                 Btu_per_person CO2_emissions Installed_kW
##   <chr>                         <dbl>         <dbl>        <dbl>
## 1 Qatar                     767202000     122122000     11414000
## 2 Singapore                 653844000     241710000     12538000
## 3 Bahrain                   514320000      43343000      6983000
## 4 United Arab Emirates      496365000     268041000     39915000
## 5 Brunei                    466111000      12172000      1265000
## 6 Kuwait                    402030000     100596000     20250000
# Simular datos de inflación para el ejemplo
set.seed(42)
merged_data <- merged_data %>%
  mutate(Inflacion = rnorm(n(), mean = 5, sd = 2))  # Simulando inflación

# Regresión
modelo_inflacion <- lm(Inflacion ~ Btu_per_person + CO2_emissions + Installed_kW, data = merged_data)
summary(modelo_inflacion)
## 
## Call:
## lm(formula = Inflacion ~ Btu_per_person + CO2_emissions + Installed_kW, 
##     data = merged_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9038 -1.2078  0.1157  1.3172  5.5788 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.733e+00  1.731e-01  27.349   <2e-16 ***
## Btu_per_person  2.073e-09  1.277e-09   1.623    0.106    
## CO2_emissions  -1.002e-09  9.785e-10  -1.024    0.307    
## Installed_kW    4.659e-09  4.918e-09   0.947    0.345    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.942 on 191 degrees of freedom
## Multiple R-squared:  0.01975,    Adjusted R-squared:  0.004354 
## F-statistic: 1.283 on 3 and 191 DF,  p-value: 0.2816
# Simular datos de deuda pública para el ejemplo
merged_data <- merged_data %>%
  mutate(Deuda_Publica = rnorm(n(), mean = 50, sd = 10))  # Simulando deuda pública

# Regresión
modelo_deuda <- lm(Deuda_Publica ~ Btu_per_person + CO2_emissions + Installed_kW, data = merged_data)
summary(modelo_deuda)
## 
## Call:
## lm(formula = Deuda_Publica ~ Btu_per_person + CO2_emissions + 
##     Installed_kW, data = merged_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.368  -7.343  -0.446   6.838  24.411 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.993e+01  8.584e-01  58.164   <2e-16 ***
## Btu_per_person  6.471e-09  6.335e-09   1.021    0.308    
## CO2_emissions  -3.761e-11  4.854e-09  -0.008    0.994    
## Installed_kW   -5.108e-09  2.439e-08  -0.209    0.834    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.632 on 191 degrees of freedom
## Multiple R-squared:  0.01667,    Adjusted R-squared:  0.001226 
## F-statistic: 1.079 on 3 and 191 DF,  p-value: 0.359
# Simular datos de desempleo juvenil para el ejemplo
merged_data <- merged_data %>%
  mutate(Desempleo_Juvenil = rnorm(n(), mean = 20, sd = 5))  # Simulando desempleo juvenil

# Regresión
modelo_desempleo <- lm(Desempleo_Juvenil ~ Btu_per_person + CO2_emissions + Installed_kW, data = merged_data)
summary(modelo_desempleo)
## 
## Call:
## lm(formula = Desempleo_Juvenil ~ Btu_per_person + CO2_emissions + 
##     Installed_kW, data = merged_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0752  -2.9448  -0.0071   3.2542  16.3996 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.972e+01  4.603e-01  42.842   <2e-16 ***
## Btu_per_person  3.243e-10  3.397e-09   0.095    0.924    
## CO2_emissions  -3.030e-09  2.603e-09  -1.164    0.246    
## Installed_kW    1.487e-08  1.308e-08   1.137    0.257    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.165 on 191 degrees of freedom
## Multiple R-squared:  0.007178,   Adjusted R-squared:  -0.008416 
## F-statistic: 0.4603 on 3 and 191 DF,  p-value: 0.7103