###1. ENERGY
###1.1. Electricity
library(rio)
electricity = import("Electricity.csv")
names(electricity)
## [1] "name" "slug" "kW"
## [4] "date_of_information" "ranking" "region"
str(electricity)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kW : chr "2,217,925,000" "1,143,266,000" "432,768,000" "348,666,000" ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
#install.packages("readr")
library(readr)
electricity$kW <- parse_number(electricity$kW)
#electricity$kW <- as.integer(electricity$kW) este lo pasa a entero pero borra el valor de china (?)
str(electricity)
## 'data.frame': 213 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Japan" ...
## $ slug : chr "china" "united-states" "india" "japan" ...
## $ kW : num 2.22e+09 1.14e+09 4.33e+08 3.49e+08 2.76e+08 ...
## $ date_of_information: int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "East and Southeast Asia" ...
###1.2. Refined_petroleum_products_production
re_petroleum = import("Refined_petroleum_products_production.csv")
names(re_petroleum)
## [1] "name" "slug" "bbl/day"
## [4] "date_of_information" "ranking" "region"
str(re_petroleum)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl/day : chr "20,300,000" "11,510,000" "6,076,000" "4,897,000" ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
re_petroleum$"bbl/day" <- parse_number(re_petroleum$"bbl/day")
str(re_petroleum)
## 'data.frame': 216 obs. of 6 variables:
## $ name : chr "United States" "China" "Russia" "India" ...
## $ slug : chr "united-states" "china" "russia" "india" ...
## $ bbl/day : num 20300000 11510000 6076000 4897000 3467000 ...
## $ date_of_information: int 2017 2015 2015 2015 2017 2017 2015 2015 2017 2017 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "East and Southeast Asia" "Central Asia" "South Asia" ...
###1.3. Carboon dioxide emissions
carbon_emissions = import("Carbon_dioxide_emissions.csv")
names(carbon_emissions)
## [1] "name" "slug" "metric tonnes of CO2"
## [4] "date_of_information" "ranking" "region"
str(carbon_emissions)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metric tonnes of CO2: chr "10,773,248,000" "5,144,361,000" "2,314,738,000" "1,848,070,000" ...
## $ date_of_information : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
carbon_emissions$"metric tonnes of CO2" <- parse_number(carbon_emissions$"metric tonnes of CO2")
str(carbon_emissions)
## 'data.frame': 218 obs. of 6 variables:
## $ name : chr "China" "United States" "India" "Russia" ...
## $ slug : chr "china" "united-states" "india" "russia" ...
## $ metric tonnes of CO2: num 1.08e+10 5.14e+09 2.31e+09 1.85e+09 1.10e+09 ...
## $ date_of_information : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "South Asia" "Central Asia" ...
###1.4. Energy consumption per capita
energy_consumption = import("Energy_consumption_per_capita.csv")
names(energy_consumption)
## [1] "name" "slug" "Btu/person"
## [4] "date_of_information" "ranking" "region"
str(energy_consumption)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ Btu/person : chr "723,582,000" "639,951,000" "547,976,000" "471,788,000" ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
energy_consumption$"Btu/person" <- parse_number(energy_consumption$"Btu/person")
str(energy_consumption)
## 'data.frame': 212 obs. of 6 variables:
## $ name : chr "Qatar" "Singapore" "Bahrain" "United Arab Emirates" ...
## $ slug : chr "qatar" "singapore" "bahrain" "united-arab-emirates" ...
## $ Btu/person : num 7.24e+08 6.40e+08 5.48e+08 4.72e+08 4.15e+08 ...
## $ date_of_information: int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Middle East" "East and Southeast Asia" "Middle East" "Middle East" ...
###2. COMMUNICATIONS
###2.1. Telephones_fixed_lines
telephones_fixed = import("Telephones_fixed_lines.csv")
names(telephones_fixed)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(telephones_fixed)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : chr "179,414,000" "91,623,000" "60,721,000" "38,580,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
telephones_fixed$value <- parse_number(telephones_fixed$value)
str(telephones_fixed)
## 'data.frame': 224 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Germany" ...
## $ slug : chr "china" "united-states" "japan" "germany" ...
## $ value : num 1.79e+08 9.16e+07 6.07e+07 3.86e+07 3.77e+07 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "Europe" ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
telephones_fixed <- telephones_fixed %>%
rename(value_telefixed = value)
###2.2. Telephones_mobile_cellular
telephones_mobile = import("Telephones_mobile_cellular.csv")
names(telephones_mobile)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(telephones_mobile)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : chr "1,781,000,000" "1,143,000,000" "372,682,000" "316,553,000" ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
telephones_mobile$value <- parse_number(telephones_mobile$value)
str(telephones_mobile)
## 'data.frame': 225 obs. of 6 variables:
## $ name : chr "China" "India" "United States" "Indonesia" ...
## $ slug : chr "china" "india" "united-states" "indonesia" ...
## $ value : num 1.78e+09 1.14e+09 3.73e+08 3.17e+08 2.45e+08 ...
## $ date_of_information: int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "South Asia" "North America" "East and Southeast Asia" ...
telephones_mobile <- telephones_mobile %>%
rename(value_telemobi = value)
###2.3. Broadband_fixed_subscriptions.csv
broadband_fixed = import("Broadband_fixed_subscriptions.csv")
names(broadband_fixed)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(broadband_fixed)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : chr "483,549,500" "121,176,000" "44,000,791" "36,344,670" ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
broadband_fixed$value <- parse_number(broadband_fixed$value)
str(broadband_fixed)
## 'data.frame': 214 obs. of 6 variables:
## $ name : chr "China" "United States" "Japan" "Brazil" ...
## $ slug : chr "china" "united-states" "japan" "brazil" ...
## $ value : num 4.84e+08 1.21e+08 4.40e+07 3.63e+07 3.62e+07 ...
## $ date_of_information: chr "2020" "2020" "2020" "2020" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "East and Southeast Asia" "North America" "East and Southeast Asia" "South America" ...
broadband_fixed <- broadband_fixed %>%
rename(value_broad = value)
###3. ECONOMY
###3.1. Inflation Rate
inflation_rate = import("Inflation rate (consumer prices).csv")
names(inflation_rate)
## [1] "name" "slug" "%"
## [4] "date_of_information" "ranking" "region"
str(inflation_rate)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ % : chr "-6.69" "-0.9" "-0.73" "-0.5" ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
#inflation_rate$"%" <- parse_number(inflation_rate$"%", locale = locale(decimal_mark = ".", grouping_mark = " "))
#str(inflation_rate)
inflation_rate$"%" <- readr::parse_number(gsub("(?<!\\d),(?!\\d)|\\.(?!\\d)", "", inflation_rate$"%", perl = TRUE))
str(inflation_rate)
## 'data.frame': 221 obs. of 6 variables:
## $ name : chr "South Sudan" "Andorra" "Dominica" "American Samoa" ...
## $ slug : chr "south-sudan" "andorra" "dominica" "american-samoa" ...
## $ % : num -6.69 -0.9 -0.73 -0.5 -0.4 -0.3 0 0 0.3 0.3 ...
## $ date_of_information: chr "2022" "2015" "2020" "2015" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Europe" "Central America and the Caribbean" "Australia and Oceania" ...
inflation_rate <- inflation_rate %>%
rename("%_inflation" = "%")
###3.2. Youth unemployment rate (ages 15-24)
youth_unemployment = import("Youth_unemployment_rate(ages 15-24).csv")
names(youth_unemployment)
## [1] "name" "slug" "%"
## [4] "date_of_information" "ranking" "region"
str(youth_unemployment)
## 'data.frame': 203 obs. of 6 variables:
## $ name : chr "Djibouti" "South Africa" "Eswatini" "Libya" ...
## $ slug : chr "djibouti" "south-africa" "eswatini" "libya" ...
## $ % : num 79.9 64.2 50.9 50.5 48.8 45.4 42.3 42.2 41.2 41.1 ...
## $ date_of_information: int 2021 2021 2021 2021 2020 2021 2021 2020 2021 2021 ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Africa" "Africa" "Africa" "Africa" ...
youth_unemployment <- youth_unemployment %>%
rename("%_youth" = "%")
###3.3. Public debt
public_debt = import("Public debt.csv")
names(public_debt)
## [1] "name" "slug" "% of GDP"
## [4] "date_of_information" "ranking" "region"
str(public_debt)
## 'data.frame': 210 obs. of 6 variables:
## $ name : chr "Greece" "Japan" "United Kingdom" "Singapore" ...
## $ slug : chr "greece" "japan" "united-kingdom" "singapore" ...
## $ % of GDP : num 237 216 185 154 147 ...
## $ date_of_information: chr "2021" "2021" "2021" "2021" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Europe" "East and Southeast Asia" "Europe" "East and Southeast Asia" ...
###3.4. Debt external
debt_external = import("Debt_external.csv")
names(debt_external)
## [1] "name" "slug" "value"
## [4] "date_of_information" "ranking" "region"
str(debt_external)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : chr "$20,275,951,000,000" "$8,722,000,000,000" "$6,356,000,000,000" "$5,671,463,000,000" ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
debt_external$value <- parse_number(debt_external$value)
str(debt_external)
## 'data.frame': 207 obs. of 6 variables:
## $ name : chr "United States" "United Kingdom" "France" "Germany" ...
## $ slug : chr "united-states" "united-kingdom" "france" "germany" ...
## $ value : num 2.03e+13 8.72e+12 6.36e+12 5.67e+12 4.35e+12 ...
## $ date_of_information: chr "2019" "2019" "2019" "2019" ...
## $ ranking : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "North America" "Europe" "Europe" "Europe" ...
debt_external <- debt_external %>%
rename("value_debt" = value)
###Prueba de unión de bases de datos
sort(setdiff(electricity$name,debt_external$name))
## [1] "American Samoa"
## [2] "Antarctica"
## [3] "Cayman Islands"
## [4] "French Polynesia"
## [5] "Gaza Strip"
## [6] "Gibraltar"
## [7] "Guam"
## [8] "Saint Helena, Ascension, and Tristan da Cunha"
## [9] "Saint Pierre and Miquelon"
## [10] "South Sudan"
## [11] "Turks and Caicos Islands"
## [12] "Virgin Islands"
## [13] "Wake Island"
sort(setdiff(debt_external$name,electricity$name))
## [1] "Andorra" "Anguilla"
## [3] "Liechtenstein" "Marshall Islands"
## [5] "Micronesia, Federated States of" "Palau"
## [7] "Wallis and Futuna"
gaaaa=merge(electricity,debt_external, by.x = "name", by.y = 'name')
gaaaa%>%
rmarkdown::paged_table()