library(rio)
broadband = import("broadband.csv")
str(broadband)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : chr "483,549,500" "121,176,000" "44,000,791" "36,344,670" ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
library(readr)
broadband$value <- parse_number(broadband$value)
str(broadband)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : num 4.84e+08 1.21e+08 4.40e+07 3.63e+07 3.62e+07 ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
table(broadband$region)
##
## Africa Australia and Oceania
## 53 17
## Central America and the Caribbean Central Asia
## 27 6
## East and Southeast Asia Europe
## 19 47
## Middle East North America
## 19 5
## South America South Asia
## 13 8
colnames(broadband)[colnames(broadband) == "value"] <- "bband"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
broadband = select(broadband, name, bband)
metric tonnes of co2
carboncito = import("carbon_emissions.csv")
str(carboncito)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metric tonnes of CO2: chr "10,773,248,000" "5,144,361,000" "2,314,738,000" "1,848,070,000" ...
## $ date_of_information : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
colnames(carboncito)[colnames(carboncito) == "metric tonnes of CO2"] <- "metrics"
carboncito$metrics <- parse_number(carboncito$metrics)
str(carboncito)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metrics : num 1.08e+10 5.14e+09 2.31e+09 1.85e+09 1.10e+09 ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
colnames(carboncito)[colnames(carboncito) == "metrics"] <- "carbon_emi"
library(dplyr)
carboncito = select(carboncito, name, carbon_emi)
pafuera = import("debt_external.csv")
str(pafuera)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : chr "$20,275,951,000,000" "$8,722,000,000,000" "$6,356,000,000,000" "$5,671,463,000,000" ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
pafuera$value <- parse_number(pafuera$value)
str(pafuera)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : num 2.03e+13 8.72e+12 6.36e+12 5.67e+12 4.35e+12 ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
colnames(pafuera)[colnames(pafuera) == "value"] <- "externa_deuda"
library(dplyr)
pafuera = select(pafuera, name, externa_deuda)
Kw
pikachu = import("electricity.csv")
str(pikachu)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kW : chr "2,217,925,000" "1,143,266,000" "432,768,000" "348,666,000" ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
colnames(pikachu)[colnames(pikachu) == "kW"] <- "kw"
pikachu$kw <- parse_number(pikachu$kw)
str(pikachu)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kw : num 2.22e+09 1.14e+09 4.33e+08 3.49e+08 2.76e+08 ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
colnames(pikachu)[colnames(pikachu) == "kw"] <- "kw_electricity"
library(dplyr)
pikachu = select(pikachu, name, kw_electricity)
Btu/person
powerade = import("energy_compercap.csv")
str(powerade)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ Btu/person : chr "723,582,000" "639,951,000" "547,976,000" "471,788,000" ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
names(powerade)
## [1] "name" "slug" "Btu/person"
## [4] "date_of_information" "ranking" "region"
library(dplyr)
powerade <- powerade %>% rename(btu = `Btu/person`)
powerade$btu <- parse_number(powerade$btu)
str(powerade)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ btu : num 7.24e+08 6.40e+08 5.48e+08 4.72e+08 4.15e+08 ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
colnames(powerade)[colnames(powerade) == "btu"] <- "btuperson"
library(dplyr)
powerade = select(powerade, name, btuperson)
%
alangarcia = import("inflation.csv")
str(alangarcia)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ % : chr "-6.69" "-0.9" "-0.73" "-0.5" ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
alangarcia <- alangarcia %>% rename(percentage = `%`)
alangarcia$percentage <- readr::parse_number(gsub("(?<!\\d),(?!\\d)|\\.(?!\\d)", "", alangarcia$percentage, perl = TRUE))
str(alangarcia)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ percentage : num -6.69 -0.9 -0.73 -0.5 -0.4 -0.3 0 0 0.3 0.3 ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
colnames(alangarcia)[colnames(alangarcia) == "percentage"] <- "infla_percent"
library(dplyr)
alangarcia = select(alangarcia, name, infla_percent)
perapad = import("mobile_celular.csv")
str(perapad)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : chr "1,781,000,000" "1,143,000,000" "372,682,000" "316,553,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
perapad$value <- parse_number(perapad$value)
str(perapad)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : num 1.78e+09 1.14e+09 3.73e+08 3.17e+08 2.45e+08 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
colnames(perapad)[colnames(perapad) == "value"] <- "mobile_cellular"
library(dplyr)
perapad = select(perapad, name, mobile_cellular)
BBL/DAY
derrame = import("petroleum_production.csv")
str(derrame)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl/day : chr "20,300,000" "11,510,000" "6,076,000" "4,897,000" ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
derrame <- derrame %>% rename(bbl = `bbl/day`)
derrame$bbl <- parse_number(derrame$bbl)
str(derrame)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl : num 20300000 11510000 6076000 4897000 3467000 ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
colnames(derrame)[colnames(derrame) == "bbl"] <- "barrels_pd"
library(dplyr)
derrame = select(derrame, name, barrels_pd)
% OF GDP
morosos = import("public_debt.csv")
str(morosos)
## 'data.frame': 210 obs. of 6 variables:
## $ name : chr "Greece" "Japan" "United Kingdom" "Singapore" ...
## $ slug : chr "greece" "japan" "united-kingdom" "singapore" ...
## $ % of GDP : num 237 216 185 154 147 ...
## $ date_of_information: chr "2021" "2021" "2021" "2021" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
morosos <- morosos %>% rename(gdp = `% of GDP`)
colnames(morosos)[colnames(morosos) == "gdp"] <- "publicdebt_percent"
library(dplyr)
morosos = select(morosos, name, publicdebt_percent)
cablemagico = import("telephone_fixedlines.csv")
str(cablemagico)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : chr "179,414,000" "91,623,000" "60,721,000" "38,580,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
cablemagico$value <- parse_number(cablemagico$value)
str(cablemagico)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : num 1.79e+08 9.16e+07 6.07e+07 3.86e+07 3.77e+07 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
colnames(cablemagico)[colnames(cablemagico) == "value"] <- "fixedlines"
library(dplyr)
cablemagico = select(cablemagico, name, fixedlines)
%
otarola = import("youth_unemployed.csv")
otarola <- otarola %>% rename(percent = `%`)
str(otarola)
## 'data.frame': 203 obs. of 6 variables:
## $ name : chr "Djibouti" "South Africa" "Eswatini" "Libya" ...
## $ slug : chr "djibouti" "south-africa" "eswatini" "libya" ...
## $ percent : num 79.9 64.2 50.9 50.5 48.8 45.4 42.3 42.2 41.2 41.1 ...
## $ date_of_information: int 2021 2021 2021 2021 2020 2021 2021 2020 2021 2021 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Africa" "Africa" "Africa" ...
colnames(otarola)[colnames(otarola) == "percent"] <- "unemployed_pc"
library(dplyr)
otarola = select(otarola, name, unemployed_pc)
library(dplyr)
library(purrr)
dataza <- list(alangarcia, broadband, cablemagico, carboncito, derrame, morosos, otarola, pafuera, perapad, pikachu, powerade)
dataza <- reduce(dataza, full_join, by = "name")
datareg <- dataza[complete.cases(dataza), ]
hp1 = formula(publicdebt_percent ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp1, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp1, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -71.591 -21.396 -4.417 14.348 175.262
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.286e+01 3.515e+00 15.040 < 2e-16 ***
## bband -5.609e-07 7.054e-07 -0.795 0.42760
## fixedlines 2.126e-06 6.689e-07 3.178 0.00176 **
## carbon_emi -6.824e-08 3.576e-08 -1.908 0.05802 .
## barrels_pd -3.192e-06 7.091e-06 -0.450 0.65324
## mobile_cellular -2.168e-08 4.804e-08 -0.451 0.65243
## kw_electricity 3.113e-07 1.991e-07 1.564 0.11974
## btuperson 1.480e-08 2.367e-08 0.625 0.53272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.17 on 172 degrees of freedom
## Multiple R-squared: 0.1848, Adjusted R-squared: 0.1516
## F-statistic: 5.568 on 7 and 172 DF, p-value: 8.502e-06
hp2 = formula(externa_deuda ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp2, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp2, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.089e+12 -1.956e+11 -6.946e+10 3.159e+09 6.394e+12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.165e+10 9.251e+10 0.450 0.653131
## bband -4.343e+04 1.857e+04 -2.339 0.020464 *
## fixedlines 5.718e+04 1.760e+04 3.248 0.001398 **
## carbon_emi -3.699e+03 9.412e+02 -3.930 0.000123 ***
## barrels_pd 5.459e+05 1.866e+05 2.925 0.003910 **
## mobile_cellular -4.335e+03 1.264e+03 -3.429 0.000759 ***
## kw_electricity 2.442e+04 5.240e+03 4.660 6.31e-06 ***
## btuperson 1.068e+03 6.231e+02 1.715 0.088221 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.994e+11 on 172 degrees of freedom
## Multiple R-squared: 0.7772, Adjusted R-squared: 0.7681
## F-statistic: 85.72 on 7 and 172 DF, p-value: < 2.2e-16
hp3 = formula(infla_percent ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp3, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp3, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6552 -885 -737 -685 144275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.337e+02 1.139e+03 0.644 0.520
## bband 1.782e-04 2.285e-04 0.780 0.437
## fixedlines -1.511e-04 2.167e-04 -0.697 0.487
## carbon_emi -9.631e-06 1.158e-05 -0.831 0.407
## barrels_pd 2.049e-03 2.297e-03 0.892 0.374
## mobile_cellular 8.334e-06 1.556e-05 0.536 0.593
## kw_electricity 1.681e-06 6.450e-05 0.026 0.979
## btuperson 8.727e-08 7.669e-06 0.011 0.991
##
## Residual standard error: 11070 on 172 degrees of freedom
## Multiple R-squared: 0.006893, Adjusted R-squared: -0.03352
## F-statistic: 0.1705 on 7 and 172 DF, p-value: 0.9908
hp4 = formula(unemployed_pc ~ bband + fixedlines + carbon_emi + barrels_pd + mobile_cellular + kw_electricity + btuperson)
reg_gauss=lm(hp4, data= datareg)
summary(reg_gauss)
##
## Call:
## lm(formula = hp4, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.963 -9.271 -2.473 7.929 58.864
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.117e+01 1.292e+00 16.394 <2e-16 ***
## bband -2.409e-07 2.592e-07 -0.929 0.3540
## fixedlines 6.035e-11 2.458e-07 0.000 0.9998
## carbon_emi 9.674e-09 1.314e-08 0.736 0.4627
## barrels_pd -2.153e-06 2.606e-06 -0.826 0.4099
## mobile_cellular -1.286e-08 1.766e-08 -0.728 0.4675
## kw_electricity 2.285e-08 7.317e-08 0.312 0.7552
## btuperson -1.526e-08 8.700e-09 -1.755 0.0811 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.56 on 172 degrees of freedom
## Multiple R-squared: 0.02942, Adjusted R-squared: -0.01009
## F-statistic: 0.7447 on 7 and 172 DF, p-value: 0.6344
```