Importing/exporting data bases

1. Csv data bases

Package {readr}.

For importing and exporting csv.

# install.packages("readr")
library("readr")
df_csv <- read_csv("data/cepalstat.csv") # read_csv2() when separator is ";" and read_delim() in general
dim(df_csv)

## [1] 13  5

head(df_csv)

## # A tibble: 6 × 5
##   pais      poblacion24 gini20   pib18 mort_igme22
##   <chr>           <dbl>  <dbl>   <dbl>       <dbl>
## 1 Argentina      45696. NA      543356         8.4
## 2 Bolivia        12413.  0.449   40288        19.6
## 3 Brasil        211999.  0.519 1924157        12.5
## 4 Chile          19765.  0.488  295907         5.4
## 5 Colombia       52886.  0.555  334254        10.6
## 6 Ecuador        18136.  0.466  107479        10.5

write_csv(df_csv, "data/cepalstat.csv")

2. Dta data bases (stata)

Package {haven}.

For importing and exporting dta (stata).

# install.packages("haven")
library("haven")
df_dta <- read_dta("data/cepalstat.dta")
dim(df_dta)

## [1] 13  5

head(df_dta)

## # A tibble: 6 × 5
##   pais      poblacion24 gini20   pib18 mort_igme22
##   <chr>           <dbl>  <dbl>   <dbl>       <dbl>
## 1 Argentina      45696. NA      543356         8.4
## 2 Bolivia        12413.  0.449   40288        19.6
## 3 Brasil        211999.  0.519 1924157        12.5
## 4 Chile          19765.  0.488  295907         5.4
## 5 Colombia       52886.  0.555  334254        10.6
## 6 Ecuador        18136.  0.466  107479        10.5

write_dta(df_dta, "data/cepalstat.dta")

3. Excel data bases

Package {openxlsx}.

For importing and exporting xlsx.

# install.packages("openxlsx")
library("openxlsx")
df_xlsx <- read.xlsx("data/cepalstat.xlsx", sheet = "Sheet 1")
dim(df_xlsx)

## [1] 13  5

head(df_xlsx)

##        pais poblacion24 gini20   pib18 mort_igme22
## 1 Argentina     45696.2     NA  543356         8.4
## 2   Bolivia     12413.3  0.449   40288        19.6
## 3    Brasil    211998.6  0.519 1924157        12.5
## 4     Chile     19764.8  0.488  295907         5.4
## 5  Colombia     52886.4  0.555  334254        10.6
## 6   Ecuador     18135.5  0.466  107479        10.5

write.xlsx(df_xlsx, "data/cepalstat.xlsx", sheetName = "Sheet 1")

Package {readxl}.

For importing xls and xlsx.

# install.packages("readxl")
library("readxl")
df_xls <- read_excel("data/cepalstat.xls", sheet = "cepalstat")
dim(df_xls)

## [1] 13  5

head(df_xls)

## # A tibble: 6 × 5
##   pais      poblacion24 gini20   pib18 mort_igme22
##   <chr>           <dbl>  <dbl>   <dbl>       <dbl>
## 1 Argentina      45696. NA      543356         8.4
## 2 Bolivia        12413.  0.449   40288        19.6
## 3 Brasil        211999.  0.519 1924157        12.5
## 4 Chile          19765.  0.488  295907         5.4
## 5 Colombia       52886.  0.555  334254        10.6
## 6 Ecuador        18136.  0.466  107479        10.5

4. Exporting tables and plots

First table. Countries’ gdp per capita and gini coefficient.

df_csv$pib18pc <- df_csv$pib18/df_csv$poblacion24

table_export1 <- subset(df_csv, select = c("pais", "pib18pc", "gini20"))
table_export1 <- table_export1[order(table_export1$pib18pc, decreasing = TRUE), ]
dim(table_export1)

## [1] 13  3

Second table. Countries’ gdp pc, gini and mortality index, selection of those with gini higher than 0.4.

table_export2 <- subset(df_csv, gini20 > 0.4)
table_export2 <- subset(table_export2, select = c("pais", "pib18pc", "gini20", "mort_igme22"))
table_export2 <- table_export2[order(table_export2$pib18pc, decreasing = TRUE), ]
dim(table_export2)

## [1] 7 4

Exporting tables 1 and 2 to excel.

list_of_tables <- list("Select var" = table_export1,
                     "Select countries" = table_export2)

write.xlsx(list_of_tables, "data/Tables report.xlsx")

Plotting.

barplot(table_export2$pib18pc, names.arg = table_export2$pais,
        main = "Total GDP per capita at current prices (2018).",
        sub = "Selection of south american countries with gini coef. higher than 40%",
        xlab = "Countries",
        ylab = "Millions of dollars",
        col = "cadetblue4")

Exporting bar plot.

jpeg("figures/graph_gdp.jpg", width = 600, height = 450, quality = 100)
barplot(table_export2$pib18pc, names.arg = table_export2$pais,
        main = "Total GDP per capita at current prices (2018).",
        sub = "Selection of south american countries with gini coef. higher than 40%",
        xlab = "Countries",
        ylab = "Millions of dollars",
        col = "cadetblue4")
dev.off()

## png 
##   2