setwd("C:/Users/HP/Desktop/para puntos")
list.files()
## [1] "Area.csv"
## [2] "Broadband - fixed subscriptions.csv"
## [3] "Carbon dioxide emissions.csv"
## [4] "Debt - external.csv"
## [5] "Electricity - installed generating capacity.csv"
## [6] "Energy consumption per capita.csv"
## [7] "Inflation rate (consumer prices).csv"
## [8] "Public debt.csv"
## [9] "Refined petroleum products - production.csv"
## [10] "semana13.html"
## [11] "semana13.Rmd"
## [12] "Telephones - fixed lines.csv"
## [13] "Telephones - mobile cellular.csv"
## [14] "Youth unemployment rate (ages 15-24).csv"
library(rio)
area=import("Area.csv")
broadband=import("Broadband - fixed subscriptions.csv")
carbond=import("Carbon dioxide emissions.csv")
debt=import("Debt - external.csv")
electricity=import("Electricity - installed generating capacity.csv")
energycon=import("Energy consumption per capita.csv")
inflationrate=import("Inflation rate (consumer prices).csv")
publicd=import("Public debt.csv")
refinedpetro=import("Refined petroleum products - production.csv")
telefonofixed=import("Telephones - fixed lines.csv")
telefonomobile=import("Telephones - mobile cellular.csv")
jovenes=import("Youth unemployment rate (ages 15-24).csv")
nombres_dataframes <- c("area", "broadband", "carbond", "debt","electricity","energycon","inflationrate","publicd", "refinedpetro","telefonofixed","telefonomobile","jovenes")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
debt <- debt %>%
rename(valor = value)
jovenes <- jovenes %>% rename(edadesprom = "%")
telefonofixed <- telefonofixed %>% rename(valorfixed = value)
telefonomobile <- telefonomobile %>% rename(valormobile = value)
area <- area %>% rename(sq_km = "sq km")
variable_comun <- "name"
names(refinedpetro)
## [1] "name" "slug" "bbl/day"
## [4] "date_of_information" "ranking" "region"
lista_dataframes <- mget(nombres_dataframes)
areavar <- area[, c("name", "sq_km")]
broadvar <- broadband[, c("name", "value")]
carbvar <- carbond[, c("name", "metric tonnes of CO2")]
debtvar <- debt[, c("name", "valor")]
electvar <- electricity[,c("name", "kW")]
enervar <- energycon[, c("name", "Btu/person")]
inflavar <- inflationrate[, c("name", "%")]
jovevar <- jovenes[, c("name", "edadesprom")]
pubvar <- publicd[, c("name", "% of GDP")]
refivar <- refinedpetro[, c("name", "bbl/day")]
fixvar <- telefonofixed[, c("name", "valorfixed")]
mobvar <- telefonomobile[, c("name", "valormobile")]
data_frames <- list(areavar, broadvar, carbvar,debtvar, electvar, enervar, inflavar,jovevar, pubvar,refivar, fixvar, mobvar)
combined_base <- data_frames[[1]]
for (i in 2:length(data_frames)) {
combined_base <- merge(combined_base, data_frames[[i]], by = "name", all = TRUE)
}
base_limpia <- na.omit(combined_base)
str(base_limpia)
## 'data.frame': 180 obs. of 13 variables:
## $ name : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ sq_km : chr "652,230" "28,748" "2,381,740" "1,246,700" ...
## $ value : chr "26,570" "508,937" "3,790,459" "230,610" ...
## $ metric tonnes of CO2: chr "7,893,000" "3,794,000" "151,633,000" "19,362,000" ...
## $ valor : chr "$284,000,000" "$9,311,000,000" "$5,574,000,000" "$42,080,000,000" ...
## $ kW : chr "776,000" "2,531,000" "21,694,000" "7,344,000" ...
## $ Btu/person : chr "3,227,000" "38,442,000" "61,433,000" "11,693,000" ...
## $ % : chr "2.3" "6.73" "9.27" "25.75" ...
## $ edadesprom : num 20.2 27.8 31.9 18.5 29.9 36.1 10.8 11.4 16.5 30.8 ...
## $ % of GDP : num 7 82.4 27.5 65 57.6 ...
## $ bbl/day : chr "0" "5,638" "627,900" "53,480" ...
## $ valorfixed : chr "146,000" "177,000" "5,576,000" "94,000" ...
## $ valormobile : chr "22,678,000" "2,782,000" "49,019,000" "23,978,000" ...
## - attr(*, "na.action")= 'omit' Named int [1:76] 2 5 6 8 9 10 13 14 31 33 ...
## ..- attr(*, "names")= chr [1:76] "2" "5" "6" "8" ...
base_limpia$valuenumer <- as.numeric(gsub(",", "", base_limpia$value))
# Verificar si hay valores NA en la nueva variable numérica
if (any(is.na(base_limpia$valuenumer))) {
print("¡Hay valores NA en la variable numérica después de la conversión!")
} else {
print("La conversión fue exitosa.")
}
## [1] "La conversión fue exitosa."
base_limpia$sq_kmnum <- as.numeric(gsub(",", "", base_limpia$sq_km))
# Verificar si hay valores NA en la nueva variable numérica
if (any(is.na(base_limpia$sq_kmnum))) {
print("¡Hay valores NA en la variable numérica después de la conversión!")
} else {
print("La conversión fue exitosa.")
}
## [1] "La conversión fue exitosa."
base_limpia <- base_limpia %>% rename(CO2 = "metric tonnes of CO2")
base_limpia$co2num <- as.numeric(gsub(",", "", base_limpia$CO2))
# Verificar si hay valores NA en la nueva variable numérica
if (any(is.na(base_limpia$CO2))) {
print("¡Hay valores NA en la variable numérica después de la conversión!")
} else {
print("La conversión fue exitosa.")
}
## [1] "La conversión fue exitosa."
base_limpia$valornum <- as.numeric(gsub("\\$|,", "", base_limpia$valor))
names(base_limpia)
## [1] "name" "sq_km" "value" "CO2" "valor"
## [6] "kW" "Btu/person" "%" "edadesprom" "% of GDP"
## [11] "bbl/day" "valorfixed" "valormobile" "valuenumer" "sq_kmnum"
## [16] "co2num" "valornum"