rm(list = ls())
# Cargar librerías
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Archivos CSV
file_energy <- "C:\\Users\\Leonardo\\Downloads\\Energy consumption per capita.csv"
file_emissions <- "C:\\Users\\Leonardo\\Downloads\\Carbon dioxide emissions.csv"
file_capacity <- "C:\\Users\\Leonardo\\Downloads\\Electricity - installed generating capacity.csv"
# Cargar datasets
energy <- read_csv(file_energy) %>%
select(name, Btu_per_person = `Btu/person`)
## Rows: 195 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): Btu/person
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
emissions <- read_csv(file_emissions) %>%
select(name, CO2_emissions = `metric tonnes of CO2`)
## Rows: 218 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): metric tonnes of CO2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
capacity <- read_csv(file_capacity) %>%
select(name, Installed_kW = `kW`)
## Rows: 211 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, slug, region
## dbl (2): date_of_information, ranking
## num (1): kW
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Mostrar estructura
str(energy)
## tibble [195 × 2] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:195] "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ Btu_per_person: num [1:195] 7.67e+08 6.54e+08 5.14e+08 4.96e+08 4.66e+08 ...
str(emissions)
## tibble [218 × 2] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:218] "China" "United States" "India" "Russia" ...
## $ CO2_emissions: num [1:218] 1.35e+10 4.94e+09 2.80e+09 1.84e+09 1.05e+09 ...
str(capacity)
## tibble [211 × 2] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:211] "China" "United States" "India" "Japan" ...
## $ Installed_kW: num [1:211] 2.59e+09 1.20e+09 4.87e+08 3.50e+08 3.01e+08 ...
# Fusionar datasets por la columna `name`
merged_data <- energy %>%
inner_join(emissions, by = "name") %>%
inner_join(capacity, by = "name")
# Verificar estructura del dataframe fusionado
str(merged_data)
## tibble [195 × 4] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:195] "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ Btu_per_person: num [1:195] 7.67e+08 6.54e+08 5.14e+08 4.96e+08 4.66e+08 ...
## $ CO2_emissions : num [1:195] 1.22e+08 2.42e+08 4.33e+07 2.68e+08 1.22e+07 ...
## $ Installed_kW : num [1:195] 11414000 12538000 6983000 39915000 1265000 ...
head(merged_data)
## # A tibble: 6 × 4
## name Btu_per_person CO2_emissions Installed_kW
## <chr> <dbl> <dbl> <dbl>
## 1 Qatar 767202000 122122000 11414000
## 2 Singapore 653844000 241710000 12538000
## 3 Bahrain 514320000 43343000 6983000
## 4 United Arab Emirates 496365000 268041000 39915000
## 5 Brunei 466111000 12172000 1265000
## 6 Kuwait 402030000 100596000 20250000
# Simular datos de inflación para el ejemplo
set.seed(42)
merged_data <- merged_data %>%
mutate(Inflacion = rnorm(n(), mean = 5, sd = 2)) # Simulando inflación
# Regresión
modelo_inflacion <- lm(Inflacion ~ Btu_per_person + CO2_emissions + Installed_kW, data = merged_data)
summary(modelo_inflacion)
##
## Call:
## lm(formula = Inflacion ~ Btu_per_person + CO2_emissions + Installed_kW,
## data = merged_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9038 -1.2078 0.1157 1.3172 5.5788
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.733e+00 1.731e-01 27.349 <2e-16 ***
## Btu_per_person 2.073e-09 1.277e-09 1.623 0.106
## CO2_emissions -1.002e-09 9.785e-10 -1.024 0.307
## Installed_kW 4.659e-09 4.918e-09 0.947 0.345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.942 on 191 degrees of freedom
## Multiple R-squared: 0.01975, Adjusted R-squared: 0.004354
## F-statistic: 1.283 on 3 and 191 DF, p-value: 0.2816
# Simular datos de deuda pública para el ejemplo
merged_data <- merged_data %>%
mutate(Deuda_Publica = rnorm(n(), mean = 50, sd = 10)) # Simulando deuda pública
# Regresión
modelo_deuda <- lm(Deuda_Publica ~ Btu_per_person + CO2_emissions + Installed_kW, data = merged_data)
summary(modelo_deuda)
##
## Call:
## lm(formula = Deuda_Publica ~ Btu_per_person + CO2_emissions +
## Installed_kW, data = merged_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.368 -7.343 -0.446 6.838 24.411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.993e+01 8.584e-01 58.164 <2e-16 ***
## Btu_per_person 6.471e-09 6.335e-09 1.021 0.308
## CO2_emissions -3.761e-11 4.854e-09 -0.008 0.994
## Installed_kW -5.108e-09 2.439e-08 -0.209 0.834
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.632 on 191 degrees of freedom
## Multiple R-squared: 0.01667, Adjusted R-squared: 0.001226
## F-statistic: 1.079 on 3 and 191 DF, p-value: 0.359
# Simular datos de desempleo juvenil para el ejemplo
merged_data <- merged_data %>%
mutate(Desempleo_Juvenil = rnorm(n(), mean = 20, sd = 5)) # Simulando desempleo juvenil
# Regresión
modelo_desempleo <- lm(Desempleo_Juvenil ~ Btu_per_person + CO2_emissions + Installed_kW, data = merged_data)
summary(modelo_desempleo)
##
## Call:
## lm(formula = Desempleo_Juvenil ~ Btu_per_person + CO2_emissions +
## Installed_kW, data = merged_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0752 -2.9448 -0.0071 3.2542 16.3996
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.972e+01 4.603e-01 42.842 <2e-16 ***
## Btu_per_person 3.243e-10 3.397e-09 0.095 0.924
## CO2_emissions -3.030e-09 2.603e-09 -1.164 0.246
## Installed_kW 1.487e-08 1.308e-08 1.137 0.257
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.165 on 191 degrees of freedom
## Multiple R-squared: 0.007178, Adjusted R-squared: -0.008416
## F-statistic: 0.4603 on 3 and 191 DF, p-value: 0.7103