Sending ouput to Excel from R

Introduction

I will review some features available to send output from R (dataframes) to Excel. We may want to do it because we feel more comfortable filtering or sorting data in excel, because we need to provide the output in excel to end users or as an input to other processes. There is not so much material available, compared to reading excel files. The overview is not exhaustive but restricted to some practical examples which we have handled in Regacc.

Packages

There are a few packages available but we will focus on {writexl} and {openxlsx}. {writexls} is the preferred option for exporting un-formatted data as it is faster and creates smaller files. We will use {openxlsx} if we want some customization.

## @Manual{R-openxlsx,
##   title = {openxlsx: Read, Write and Edit xlsx Files},
##   author = {Philipp Schauberger and Alexander Walker},
##   year = {2021},
##   note = {R package version 4.2.5},
##   url = {https://CRAN.R-project.org/package=openxlsx},
## }
## 
## @Manual{R-writexl,
##   title = {writexl: Export Data Frames to Excel xlsx
##     Format},
##   author = {Jeroen Ooms},
##   year = {2021},
##   note = {R package version 1.4.0},
##   url = {https://CRAN.R-project.org/package=writexl},
## }

Simple export

Exporting to excel a dataframe. We will use a csv file of eurobase table nama_10r_2gdp as the starting point of the example.

library(tidyverse)
library(writexl)

df<- rio::import("data/gdp2v.csv") %>% 
  mutate(country = str_sub(geo,1,2), 
       NUTS= as.factor(str_length(geo)-2)) %>% 
  filter(country == "ES" & unit %in% c("EUR_HAB", "PPS_HAB_EU27_2020")) %>% 
  select(country,geo,NUTS,unit,obs_value) 

writexl::write_xlsx(df, "output/gdp2.xlsx")

Creating a temporary file

For day to day use we can create an un-named temporary file. The best is to create a function that we can add to our rstudio snippets and call it when needed instead of typing it every time we need to do it. Taken from https://twitter.com/brodriguesco/status/1447468259725434886

show_in_excel <- function(.data){
  tmp <- paste0(tempfile(), ".xlsx")
  writexl::write_xlsx(.data,tmp)
  browseURL(tmp)
}

show_in_excel(df)

Exporting as an excel table

If we want to avoid having to manually filter the first row in Excel after opening the file for sorting/filtering we can export directly the dataframe as an Excel table.

library(openxlsx)

write.xlsx(df, "output/gdp2.xlsx", overwrite = TRUE, asTable = TRUE)

Further customisation options

If we want to customise more the output, add a name to the worksheet, table style, font, number format, colours, etc. there are endless possibilities (https://cran.r-project.org/web/packages/openxlsx/vignettes/Formatting.html).

wb <- createWorkbook() 
    modifyBaseFont(wb, fontSize = 12, fontName = "Calibri Light")
    options(openxlsx.numFmt = "#,##0.00")
    addWorksheet(wb, "GDP_ES")
    writeDataTable(wb, "GDP_ES", df, tableStyle = "TableStyleMedium13")
    
saveWorkbook(wb,"output/gdp2.xlsx", overwrite = TRUE)

Modifying an existing file

We can open an existing file and further specify where to place the data, insert plots, etc.

Here we calculate the mean by region and create a chart showing the mean as a bar and the observations as points. We will place the table with the mean in a worksheet called Summary on the third row and column and the chart in a Woeksheet named Chart with some defined height and width.

temp<- df %>% 
  group_by(geo,unit,NUTS) %>% 
  summarise(avg = round(mean(obs_value, na.rm = TRUE)))

p<- ggplot()+
  geom_col(data=temp %>% filter(NUTS!=1),aes(avg, reorder(geo,avg), fill= NUTS))+
  geom_point(data=df%>% filter(NUTS!=1), aes(obs_value,geo))+
  facet_wrap(~unit, scales = "free_x")+
  scale_x_continuous(scales::pretty_breaks(n=4), labels = scales::label_number())+
  ggthemes::theme_fivethirtyeight()+
  ggthemes::scale_fill_tableau()+
  theme(legend.position = "none")

wb <- loadWorkbook("output/gdp2.xlsx")
addWorksheet(wb, "Summary")
writeDataTable(wb, sheet="Summary", temp, startCol = 3, startRow = 3,tableStyle = "TableStyleLight13")
 print(p)

 addWorksheet(wb, "Chart")
wb %>% insertPlot(sheet="Chart",startCol =1, startRow=1,width=12,height = 9, dpi=600)
saveWorkbook(wb, "output/gdp2_new.xlsx", overwrite = TRUE)

Split the dataframe and write each group to a different worksheet

Sometimes we may want to split a big dataframe into several worksheets using the groups of a specific column. This is helpful to organise the output and avoid the number of rows limit of 1.000.000.

df_unit <- split(df,df$unit)
write.xlsx(df_unit, file = "output/gdp2_unit.xlsx", overwrite = TRUE)

Creating a custom function

We can create a custom function, write_countries with a single argument (x which is the country) that would create the same file. We just need to add a filter to the dataset with the variable that also will be used for naming the files and worksheets. We do that using the base function paste0.

write_countries <- function(x){
  
  temp<-df %>% 
    filter(country == x)
  
  summ<- temp %>%
  group_by(geo,unit,NUTS) %>% 
  summarise(avg = round(mean(obs_value, na.rm = TRUE)))

p<- ggplot()+
  geom_col(data=summ %>% filter(NUTS!=1),aes(avg, reorder(geo,avg), fill= NUTS))+
  geom_point(data=temp %>% filter(NUTS!=1), aes(obs_value,geo))+
  facet_wrap(~unit, scales = "free_x")+
  scale_x_continuous(scales::pretty_breaks(n=4), labels = scales::label_number())+
  ggthemes::theme_fivethirtyeight()+
  ggthemes::scale_fill_tableau()+
  theme(legend.position = "none")

  wb <- createWorkbook() 
    modifyBaseFont(wb, fontSize = 12, fontName = "Calibri Light")
    options(openxlsx.numFmt = "#,##0.00")
    addWorksheet(wb, paste0("GDP_",x))
    writeDataTable(wb, paste0("GDP_",x), temp, tableStyle = "TableStyleMedium13")
    addWorksheet(wb, "Summary")
    writeDataTable(wb, sheet="Summary", summ, startCol = 3, startRow = 3,tableStyle = "TableStyleLight13")
     print(p)
 addWorksheet(wb, "Chart")
wb %>% insertPlot(sheet="Chart",startCol =1, startRow=1,width=12,height = 9, dpi=600)

saveWorkbook(wb,paste0("output/",x,"_gdp2.xlsx"), overwrite = TRUE)}
write_countries("ES")

Iterating the creation of files

Finally, we can automatise the creation of the files by creating a list of countries and applying the function to each element of the list. The use the function walk from the package {purrr} to do that.

df<- rio::import("data/gdp2v.csv") %>% 
  mutate(country = str_sub(geo,1,2), 
       NUTS= as.factor(str_length(geo)-2)) %>% 
  select(country,geo,NUTS,unit,obs_value) %>% 
  filter(unit %in% c("EUR_HAB", "PPS_HAB_EU27_2020"))

list_countries<- c("NL","SK","BE", "AT", "CZ")

walk(list_countries, write_countries)

---
title: "Sending ouput to Excel from R"
date: "January 2022"
author: Luis Biedma
output:
  rmdformats::readthedown:
    highlight: kate
    code_download: true
---
```{r setup, include = FALSE}
## Global options
knitr::opts_chunk$set(
	message = FALSE,
	warning = FALSE,
	cache = FALSE,
	fig.height=9, 
	fig.width=12
)
```

## Introduction 

I will review some features available to send output from R (dataframes) to Excel. We may want to do it because we feel more comfortable filtering or sorting data in excel, because we need to provide the output in excel to end users or as an input to other processes. There is not so much material available, compared to reading excel files. The overview is not exhaustive but restricted to some practical examples which we have handled in Regacc.

## Packages

There are a few packages available but we will  focus on `{writexl}` and `{openxlsx}`. ` {writexls}` is the preferred option for exporting un-formatted data as it is faster and creates smaller files. We will use `{openxlsx}` if we want some customization.

```{r echo=FALSE}
knitr::write_bib(c("writexl", "openxlsx"), width = 60)
```


## Simple export

Exporting to excel a dataframe. We will use a csv file of eurobase table nama_10r_2gdp as the starting point of the example.

```{r}
library(tidyverse)
library(writexl)

df<- rio::import("data/gdp2v.csv") %>% 
  mutate(country = str_sub(geo,1,2), 
       NUTS= as.factor(str_length(geo)-2)) %>% 
  filter(country == "ES" & unit %in% c("EUR_HAB", "PPS_HAB_EU27_2020")) %>% 
  select(country,geo,NUTS,unit,obs_value) 

writexl::write_xlsx(df, "output/gdp2.xlsx")
```

## Creating a temporary file

For day to day use we can create an un-named temporary file. The best is to create a function that we can add to our rstudio snippets and call it when needed instead of typing it every time we need to do it. Taken from <https://twitter.com/brodriguesco/status/1447468259725434886>

```{r}
show_in_excel <- function(.data){
  tmp <- paste0(tempfile(), ".xlsx")
  writexl::write_xlsx(.data,tmp)
  browseURL(tmp)
}

show_in_excel(df)
```

## Exporting as an excel table

If we want to avoid having to manually filter the first row in Excel after opening the file for sorting/filtering we can export directly the dataframe as an Excel table.

```{r}
library(openxlsx)

write.xlsx(df, "output/gdp2.xlsx", overwrite = TRUE, asTable = TRUE)
```

## Further customisation options

If we want to customise more the output, add a name to the worksheet, table style, font, number format, colours, etc. there are endless possibilities (<https://cran.r-project.org/web/packages/openxlsx/vignettes/Formatting.html>).

```{r}
wb <- createWorkbook() 
    modifyBaseFont(wb, fontSize = 12, fontName = "Calibri Light")
    options(openxlsx.numFmt = "#,##0.00")
    addWorksheet(wb, "GDP_ES")
    writeDataTable(wb, "GDP_ES", df, tableStyle = "TableStyleMedium13")
    
saveWorkbook(wb,"output/gdp2.xlsx", overwrite = TRUE)
```

## Modifying an existing file

We can open an existing file and further specify where to place the data, insert plots, etc.

Here we calculate the mean by region and create a chart showing the mean as a bar and the observations as points. We will place the table with the mean in a worksheet called *Summary* on the third row and column and the chart in a Woeksheet named *Chart* with some defined height and width. 

```{r}
temp<- df %>% 
  group_by(geo,unit,NUTS) %>% 
  summarise(avg = round(mean(obs_value, na.rm = TRUE)))

p<- ggplot()+
  geom_col(data=temp %>% filter(NUTS!=1),aes(avg, reorder(geo,avg), fill= NUTS))+
  geom_point(data=df%>% filter(NUTS!=1), aes(obs_value,geo))+
  facet_wrap(~unit, scales = "free_x")+
  scale_x_continuous(scales::pretty_breaks(n=4), labels = scales::label_number())+
  ggthemes::theme_fivethirtyeight()+
  ggthemes::scale_fill_tableau()+
  theme(legend.position = "none")

wb <- loadWorkbook("output/gdp2.xlsx")
addWorksheet(wb, "Summary")
writeDataTable(wb, sheet="Summary", temp, startCol = 3, startRow = 3,tableStyle = "TableStyleLight13")
 print(p)
 addWorksheet(wb, "Chart")
wb %>% insertPlot(sheet="Chart",startCol =1, startRow=1,width=12,height = 9, dpi=600)
saveWorkbook(wb, "output/gdp2_new.xlsx", overwrite = TRUE)
```

## Split the dataframe and write each group to a different worksheet

Sometimes we may want to split a big dataframe into several worksheets using the groups of a specific column. This is helpful to organise the output and avoid the number of rows limit of 1.000.000.

```{r}
df_unit <- split(df,df$unit)
write.xlsx(df_unit, file = "output/gdp2_unit.xlsx", overwrite = TRUE)
```

## Creating a custom function

We can create a custom function, `write_countries`  with a single argument (`x` which is the country) that would create the same file. We just need to add a filter to the dataset with the variable that also  will be used for naming the files and worksheets. We do that using the base function `paste0`.

```{r}
write_countries <- function(x){
  
  temp<-df %>% 
    filter(country == x)
  
  summ<- temp %>%
  group_by(geo,unit,NUTS) %>% 
  summarise(avg = round(mean(obs_value, na.rm = TRUE)))

p<- ggplot()+
  geom_col(data=summ %>% filter(NUTS!=1),aes(avg, reorder(geo,avg), fill= NUTS))+
  geom_point(data=temp %>% filter(NUTS!=1), aes(obs_value,geo))+
  facet_wrap(~unit, scales = "free_x")+
  scale_x_continuous(scales::pretty_breaks(n=4), labels = scales::label_number())+
  ggthemes::theme_fivethirtyeight()+
  ggthemes::scale_fill_tableau()+
  theme(legend.position = "none")

  wb <- createWorkbook() 
    modifyBaseFont(wb, fontSize = 12, fontName = "Calibri Light")
    options(openxlsx.numFmt = "#,##0.00")
    addWorksheet(wb, paste0("GDP_",x))
    writeDataTable(wb, paste0("GDP_",x), temp, tableStyle = "TableStyleMedium13")
    addWorksheet(wb, "Summary")
    writeDataTable(wb, sheet="Summary", summ, startCol = 3, startRow = 3,tableStyle = "TableStyleLight13")
     print(p)
 addWorksheet(wb, "Chart")
wb %>% insertPlot(sheet="Chart",startCol =1, startRow=1,width=12,height = 9, dpi=600)

saveWorkbook(wb,paste0("output/",x,"_gdp2.xlsx"), overwrite = TRUE)}

```


```{r}

write_countries("ES")
```


## Iterating the creation of files 

Finally, we can automatise the creation of the files by creating a list of countries and applying the function to each element of the list. The use the function `walk` from the package `{purrr}` to do that.

```{r}

df<- rio::import("data/gdp2v.csv") %>% 
  mutate(country = str_sub(geo,1,2), 
       NUTS= as.factor(str_length(geo)-2)) %>% 
  select(country,geo,NUTS,unit,obs_value) %>% 
  filter(unit %in% c("EUR_HAB", "PPS_HAB_EU27_2020"))

list_countries<- c("NL","SK","BE", "AT", "CZ")

walk(list_countries, write_countries)
```
