Untitled

setwd("C:/Users/HP/Desktop/para puntos")

list.files()

##  [1] "Area.csv"                                       
##  [2] "Broadband - fixed subscriptions.csv"            
##  [3] "Carbon dioxide emissions.csv"                   
##  [4] "Debt - external.csv"                            
##  [5] "Electricity - installed generating capacity.csv"
##  [6] "Energy consumption per capita.csv"              
##  [7] "Inflation rate (consumer prices).csv"           
##  [8] "Public debt.csv"                                
##  [9] "Refined petroleum products - production.csv"    
## [10] "semana13.html"                                  
## [11] "semana13.Rmd"                                   
## [12] "Telephones - fixed lines.csv"                   
## [13] "Telephones - mobile cellular.csv"               
## [14] "Youth unemployment rate (ages 15-24).csv"

library(rio)
area=import("Area.csv")
broadband=import("Broadband - fixed subscriptions.csv")
carbond=import("Carbon dioxide emissions.csv")
debt=import("Debt - external.csv")
electricity=import("Electricity - installed generating capacity.csv")
energycon=import("Energy consumption per capita.csv")
inflationrate=import("Inflation rate (consumer prices).csv")
publicd=import("Public debt.csv")
refinedpetro=import("Refined petroleum products - production.csv")
telefonofixed=import("Telephones - fixed lines.csv")
telefonomobile=import("Telephones - mobile cellular.csv")
jovenes=import("Youth unemployment rate (ages 15-24).csv")

nombres_dataframes <- c("area", "broadband", "carbond", "debt","electricity","energycon","inflationrate","publicd", "refinedpetro","telefonofixed","telefonomobile","jovenes")

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

debt <- debt %>%
  rename(valor = value)

jovenes <- jovenes %>% rename(edadesprom = "%")

telefonofixed <- telefonofixed %>% rename(valorfixed = value)

telefonomobile <- telefonomobile %>% rename(valormobile = value)

area <- area %>% rename(sq_km = "sq km")

variable_comun <- "name"

names(refinedpetro)

## [1] "name"                "slug"                "bbl/day"            
## [4] "date_of_information" "ranking"             "region"

lista_dataframes <- mget(nombres_dataframes)

areavar <- area[, c("name", "sq_km")]
broadvar <- broadband[, c("name", "value")]
carbvar <- carbond[, c("name", "metric tonnes of CO2")]
debtvar <- debt[, c("name", "valor")]
electvar <- electricity[,c("name", "kW")]
enervar <- energycon[, c("name", "Btu/person")]
inflavar <- inflationrate[, c("name", "%")]
jovevar <- jovenes[, c("name", "edadesprom")]
pubvar <- publicd[, c("name", "% of GDP")]
refivar <- refinedpetro[, c("name", "bbl/day")]
fixvar <- telefonofixed[, c("name", "valorfixed")] 
mobvar <- telefonomobile[, c("name", "valormobile")]

data_frames <- list(areavar, broadvar, carbvar,debtvar, electvar, enervar, inflavar,jovevar, pubvar,refivar,  fixvar, mobvar)

combined_base <- data_frames[[1]]
for (i in 2:length(data_frames)) {
  combined_base <- merge(combined_base, data_frames[[i]], by = "name", all = TRUE)
}

base_limpia <- na.omit(combined_base)

str(base_limpia)

## 'data.frame':    180 obs. of  13 variables:
##  $ name                : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ sq_km               : chr  "652,230" "28,748" "2,381,740" "1,246,700" ...
##  $ value               : chr  "26,570" "508,937" "3,790,459" "230,610" ...
##  $ metric tonnes of CO2: chr  "7,893,000" "3,794,000" "151,633,000" "19,362,000" ...
##  $ valor               : chr  "$284,000,000" "$9,311,000,000" "$5,574,000,000" "$42,080,000,000" ...
##  $ kW                  : chr  "776,000" "2,531,000" "21,694,000" "7,344,000" ...
##  $ Btu/person          : chr  "3,227,000" "38,442,000" "61,433,000" "11,693,000" ...
##  $ %                   : chr  "2.3" "6.73" "9.27" "25.75" ...
##  $ edadesprom          : num  20.2 27.8 31.9 18.5 29.9 36.1 10.8 11.4 16.5 30.8 ...
##  $ % of GDP            : num  7 82.4 27.5 65 57.6 ...
##  $ bbl/day             : chr  "0" "5,638" "627,900" "53,480" ...
##  $ valorfixed          : chr  "146,000" "177,000" "5,576,000" "94,000" ...
##  $ valormobile         : chr  "22,678,000" "2,782,000" "49,019,000" "23,978,000" ...
##  - attr(*, "na.action")= 'omit' Named int [1:76] 2 5 6 8 9 10 13 14 31 33 ...
##   ..- attr(*, "names")= chr [1:76] "2" "5" "6" "8" ...

base_limpia$valuenumer <- as.numeric(gsub(",", "", base_limpia$value))

# Verificar si hay valores NA en la nueva variable numérica
if (any(is.na(base_limpia$valuenumer))) {
  print("¡Hay valores NA en la variable numérica después de la conversión!")
} else {
  print("La conversión fue exitosa.")
}

## [1] "La conversión fue exitosa."

base_limpia$sq_kmnum <- as.numeric(gsub(",", "", base_limpia$sq_km))

# Verificar si hay valores NA en la nueva variable numérica
if (any(is.na(base_limpia$sq_kmnum))) {
  print("¡Hay valores NA en la variable numérica después de la conversión!")
} else {
  print("La conversión fue exitosa.")
}

## [1] "La conversión fue exitosa."

base_limpia <- base_limpia %>% rename(CO2 = "metric tonnes of CO2")

base_limpia$co2num <- as.numeric(gsub(",", "", base_limpia$CO2))

# Verificar si hay valores NA en la nueva variable numérica
if (any(is.na(base_limpia$CO2))) {
  print("¡Hay valores NA en la variable numérica después de la conversión!")
} else {
  print("La conversión fue exitosa.")
}

## [1] "La conversión fue exitosa."

base_limpia$valornum <- as.numeric(gsub("\\$|,", "", base_limpia$valor))

names(base_limpia)

##  [1] "name"        "sq_km"       "value"       "CO2"         "valor"      
##  [6] "kW"          "Btu/person"  "%"           "edadesprom"  "% of GDP"   
## [11] "bbl/day"     "valorfixed"  "valormobile" "valuenumer"  "sq_kmnum"   
## [16] "co2num"      "valornum"

Untitled

Shamanta

2024-06-08