Introduction

Below is an example of how you can do some calculations in R and build tables in RMarkdown. The code can be fiddly at first. However, the good thing is if you notice an error in your data and need to reload it or perhaps you add more participants, you just run the same code with the updated data. This saves so much time (yay, no copying and pasting… I’m guilty of doing this a lot in my past life).

Load Packages

If you haven’t previously used the below packages, they can be installed onto your computer using install.packages("packagename"). Thereafter, you only need to load the packages into your R session using library(packagename), as done below.

library(tidyverse)
library(kableExtra)
library(ggpubr)
library(ggstance)
library(broom)
library(lme4)

References

My go-to reference for formatting tables can be found at this link. You may also use Google to search for further details/modifications.

I will use two examples using datasets found within R:

a general summary table with counts, mean, standard deviation, test statistics and p-values
a model summary table with model output

Example 1: General Summary Tables

The first example will use the R dataset mtcars. First, we’ll assign the data to an object and check structure of the data frame.

data <- mtcars 

head(data)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

str(data)

## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

Brief Data Preparation and Exploration

Before summarising the data for the table, it’s good to check the variables formats and distributions.

# Factorise cylinder variable
data$cyl <- factor(data$cyl, ordered = TRUE)
data$am <- factor(data$am, labels = c("Automatic", "Manual"))

# Check Counts of Transmission by Cylinder
table(data$cyl, data$am)

##    
##     Automatic Manual
##   4         3      8
##   6         4      3
##   8        12      2

# Create plots for a few variables. The geom_boxploth function comes from the ggstance package
p1 <- ggplot(data, aes(x = mpg, y = -0.5)) +
  geom_boxploth(aes(fill = cyl)) +
  geom_density(aes(x = mpg), inherit.aes = FALSE) +
  facet_grid(cyl ~ .) + scale_fill_discrete() +
  theme_bw() +
  xlab("Miles/Gallon") + ylab(" ") + labs(fill = "Cylinder") +
  ylim(-1, 1.1) + 
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
    )

p2 <- ggplot(data, aes(x = wt, y = -0.5)) +
  geom_boxploth(aes(fill = cyl)) +
  geom_density(aes(x = wt), inherit.aes = FALSE) +
  facet_grid(cyl ~ .) + scale_fill_discrete() +
  theme_bw() +
  xlab("Weight (1000 ibs)") + ylab(" ") + labs(fill = "Cylinder") +
  ylim(-1, 1.1) + 
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
    )

p3 <- ggplot(data, aes(x = hp, y = -0.5)) +
  geom_boxploth(aes(fill = cyl)) +
  geom_density(aes(x = hp), inherit.aes = FALSE) +
  facet_grid(cyl ~ .) + scale_fill_discrete() +
  theme_bw() +
  xlab("Gross Horsepower") + ylab(" ") + labs(fill = "Cylinder") +
  ylim(-1, 1.1) + 
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
    )


# The ggarrane function pastes the plots together for your output, from the ggpubr packer
ggarrange(
  p1, p2, p3,
  ncol = 3,
  common.legend = TRUE, legend="bottom"
)

Summarise Data for Table

Next, we’re going to summarise the data for our table.

# Overall counts and percentages for each cylinder 
summCounts <- as.data.frame(table(data$cyl))
summCounts$p <- round(summCounts$Freq/sum(summCounts$Freq)*100,1) 


# Mean/SD - mpg
summMean <- data %>%
  select(cyl, mpg) %>%
  group_by(cyl) %>%
  summarise_each(funs(mean(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)
  
summSD <- data %>%
  select(cyl, mpg) %>%
  group_by(cyl) %>%
  summarise_each(funs(sd(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)

# Median/MAD - Car weight and horsepower
summMedian <- data %>%
  select(cyl, wt, hp) %>%
  group_by(cyl) %>%
  summarise_each(funs(median(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)
  
summMAD <- data %>%
  select(cyl, wt, hp) %>%
  group_by(cyl) %>%
  summarise_each(funs(mad(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)


# Counts/% - Transmission
summTrans <- as.data.frame(table(data$am, data$cyl))
summTrans$p <- c(round(summTrans$Freq[1:2]/sum(summTrans$Freq[1:2])*100,1),
                 round(summTrans$Freq[3:4]/sum(summTrans$Freq[3:4])*100,1),
                 round(summTrans$Freq[5:6]/sum(summTrans$Freq[5:6])*100,1))

Perform some statistical tests for the table.

summTests <- data.frame(Stat = c(round(tidy(aov(data$mpg ~ data$cyl))$statistic[1], 3),
                                 round(tidy(aov(data$wt ~ data$cyl))$statistic[1], 3),
                                 round(tidy(aov(data$hp ~ data$cyl))$statistic[1], 3),
                                 round(chisq.test(data$cyl, data$am)$statistic, 3)),
                        Pvals = c(signif(tidy(aov(data$mpg ~ data$cyl))$p.value[1], 3),
                                   signif(tidy(aov(data$wt ~ data$cyl))$p.value[1], 3),
                                   signif(tidy(aov(data$hp ~ data$cyl))$p.value[1], 3),
                                   signif(chisq.test(data$cyl, data$am)$p.value, 3)),
                        stringsAsFactors = FALSE)

Build Table

Now we need to build a table and save it as a data frame. It is important to add space where needed in the table (i.e. when adding the test statistic and p-value for a categorical variable). Any formatting (bold, italics) may be done while building the table, as I have, but you may also parse argumaents later as KableExtra arguments.

t1 <- data.frame(var = c(paste0("**Count**", "  ", "*n (%)*"),
                         "",
                         paste0("**Miles/Gallon**", "  ", "*Mean (SD)*"),
                         paste0("**Car Weight (1000 lbs)**", "  ", "*Median (MAD)*"),
                         paste0("**Horsepower**", "  ", "*Median (MAD)*"),
                         paste0("**Transmission**", "  ", "*n (%)*"),
                         "*Automatic*", "*Manual*"),
                 
                 cyl4 = c(paste0(summCounts$Freq[1], " (", summCounts$p[1], "%)"),
                          "",
                          paste0(summMean$mpg[1], " (", summSD$mpg[1], ")"),
                          paste0(summMedian[1,2:3], " (", summMAD[1,2:3], ")"),
                          "",
                          paste0(summTrans$Freq[1:2], " (", summTrans$p[1:2], "%)")),
                 
                 cyl6 = c(paste0(summCounts$Freq[2], " (", summCounts$p[2], "%)"),
                          "",
                          paste0(summMean$mpg[2], " (", summSD$mpg[2], ")"),
                          paste0(summMedian[2,2:3], " (", summMAD[2,2:3], ")"),
                          "",
                          paste0(summTrans$Freq[3:4], " (", summTrans$p[3:4], "%)")),
                 
                 cyl8 = c(paste0(summCounts$Freq[3], " (", summCounts$p[3], "%)"),
                          "",
                          paste0(summMean$mpg[3], " (", summSD$mpg[3], ")"),
                          paste0(summMedian[3,2:3], " (", summMAD[3,2:3], ")"),
                          "",
                          paste0(summTrans$Freq[5:6], " (", summTrans$p[5:6], "%)")),
                 
                 stat = c("", "", 
                          paste0(summTests$Stat[1:3], footnote_marker_number(1)), "",  #add footnote for diff tests
                          paste0(summTests$Stat[4], footnote_marker_number(2)), ""),
                 
                 pval = c("", "", summTests$Pvals[1:3], "", summTests$Pvals[4], ""),
                 
                 stringsAsFactors = FALSE)

# Print table to check format, which will help with parsing to the table function
t1

##                                         var         cyl4         cyl6
## 1                        **Count**  *n (%)*   11 (34.4%)    7 (21.9%)
## 2                                                                    
## 3             **Miles/Gallon**  *Mean (SD)* 26.66 (4.51) 19.74 (1.45)
## 4 **Car Weight (1000 lbs)**  *Median (MAD)*   2.2 (0.54)  3.21 (0.36)
## 5            **Horsepower**  *Median (MAD)*   91 (32.62)   110 (7.41)
## 6                 **Transmission**  *n (%)*                          
## 7                               *Automatic*    3 (27.3%)    4 (57.1%)
## 8                                  *Manual*    8 (72.7%)    3 (42.9%)
##            cyl8               stat     pval
## 1    14 (43.8%)                            
## 2                                          
## 3   15.1 (2.56) 39.698<sup>1</sup> 4.98e-09
## 4   3.75 (0.41) 22.911<sup>1</sup> 1.07e-06
## 5 192.5 (44.48) 36.177<sup>1</sup> 1.32e-08
## 6                                          
## 7    12 (85.7%)  8.741<sup>2</sup>   0.0126
## 8     2 (14.3%)

Lastly, you parse your table data frame into the Kable and KableExtra functions. You may use the reference link above to edit the formatting.

# HTML Table Function
kable(t1,
      caption = "**Summary Table of mtcars Dataset**",
      col.names = c("", "4 Cylinders", "6 Cylinders", "8 Cylinders", "Test", "P-Value"),
      align="lccccc", escape = FALSE) %>%
  
  # KableExtra Arguments
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_indent(c(7,8)) %>%
  footnote(number = c("ANOVA F Statistic", "Chi-Square Test"),
           general_title = "",
           footnote_as_chunk = T)

**Summary Table of mtcars Dataset**
	4 Cylinders	6 Cylinders	8 Cylinders	Test	P-Value
Count n (%)	11 (34.4%)	7 (21.9%)	14 (43.8%)

Miles/Gallon Mean (SD)	26.66 (4.51)	19.74 (1.45)	15.1 (2.56)	39.698¹	4.98e-09
Car Weight (1000 lbs) Median (MAD)	2.2 (0.54)	3.21 (0.36)	3.75 (0.41)	22.911¹	1.07e-06
Horsepower Median (MAD)	91 (32.62)	110 (7.41)	192.5 (44.48)	36.177¹	1.32e-08
Transmission n (%)
Automatic	3 (27.3%)	4 (57.1%)	12 (85.7%)	8.741²	0.0126
Manual	8 (72.7%)	3 (42.9%)	2 (14.3%)
¹ ANOVA F Statistic ² Chi-Square Test

Example 2: Model Summary Tables

The second example will use the R dataset ChickWeight. First, we’ll assign the data to an object and check the structure of the data frame.

data <- ChickWeight 

head(data)

## Grouped Data: weight ~ Time | Chick
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1

str(data)

## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   578 obs. of  4 variables:
##  $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
##  $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
##  $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "outer")=Class 'formula'  language ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(gm)"

Brief Data Preparation and Exploration

Before running the model, it’s good to check the variables formats and distributions (NOTE: in this case, the Chick and Diet variables were already factored).

# Relabel diet factor
data$Diet <- factor(data$Diet, labels = c("Diet1", "Diet2", "Diet3", "Diet4"))

# Explore Distribution Over Time
ggplot(data, aes(x = Time, y = weight, col=Chick)) + 
  geom_line () + geom_point() + 
  facet_wrap(~Diet) +
  theme_bw() + theme(legend.position = "none") +
  ylab("Weight (g)") + xlab("Age (days)")

Fit Models

We’re going to fit a linear mixed effects model to the data, with fixed effects Diet and Time, and a random intercept and slope for each Chick. We’re also going to fit Diet² since there is evidence of a quadratic relationship. Further, we’ve added a 1st order autocorrelation and the intercept of the main effect variable is also set to 0 (via the -1). This makes sense biologically, but it is also the only way to make a model with a serial autocorrelation converge.

These steps are detailed further at this link.

# Full model
m1 <- lmer(weight ~ Diet * poly(Time,2)-1 + ((Time-1)|Chick), data = ChickWeight)

m1ci <- confint.merMod(m1)

summary(m1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: weight ~ Diet * poly(Time, 2) - 1 + ((Time - 1) | Chick)
##    Data: ChickWeight
## 
## REML criterion at convergence: 4669.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0825 -0.5313  0.0952  0.5013  3.4647 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  Chick    Time   6.607   2.57   
##  Residual      156.113  12.49   
## Number of obs: 578, groups:  Chick, 50
## 
## Fixed effects:
##                      Estimate Std. Error t value
## Diet1                 101.166      6.356  15.917
## Diet2                 120.862      8.786  13.756
## Diet3                 140.579      8.786  16.000
## Diet4                 134.251      8.788  15.276
## poly(Time, 2)1       1032.903     97.999  10.540
## poly(Time, 2)2         70.347     20.593   3.416
## Diet2:poly(Time, 2)1  361.236    166.626   2.168
## Diet3:poly(Time, 2)1  813.581    166.626   4.883
## Diet4:poly(Time, 2)1  519.814    166.756   3.117
## Diet2:poly(Time, 2)2   52.718     34.288   1.537
## Diet3:poly(Time, 2)2  209.527     34.288   6.111
## Diet4:poly(Time, 2)2   -6.275     34.830  -0.180
## 
## Correlation of Fixed Effects:
##             Diet1  Diet2  Diet3  Diet4  p(T,2)1 p(T,2)2 D2:(T,2)1 D3:(T,2)1
## Diet2        0.000                                                         
## Diet3        0.000  0.000                                                  
## Diet4        0.000  0.000  0.000                                           
## poly(Tm,2)1  0.970  0.000  0.000  0.000                                    
## poly(Tm,2)2  0.006  0.000  0.000  0.000  0.018                             
## Dt2:p(T,2)1 -0.570  0.785  0.000  0.000 -0.588  -0.011                     
## Dt3:p(T,2)1 -0.570  0.000  0.785  0.000 -0.588  -0.011   0.346             
## Dt4:p(T,2)1 -0.570  0.000  0.000  0.785 -0.588  -0.011   0.346     0.346   
## Dt2:p(T,2)2 -0.003 -0.001  0.000  0.000 -0.011  -0.601   0.003     0.006   
## Dt3:p(T,2)2 -0.003  0.000 -0.001  0.000 -0.011  -0.601   0.006     0.003   
## Dt4:p(T,2)2 -0.003  0.000  0.000  0.003 -0.011  -0.591   0.006     0.006   
##             D4:(T,2)1 D2:(T,2)2 D3:(T,2)2
## Diet2                                    
## Diet3                                    
## Diet4                                    
## poly(Tm,2)1                              
## poly(Tm,2)2                              
## Dt2:p(T,2)1                              
## Dt3:p(T,2)1                              
## Dt4:p(T,2)1                              
## Dt2:p(T,2)2  0.006                       
## Dt3:p(T,2)2  0.006     0.361             
## Dt4:p(T,2)2  0.009     0.355     0.355

# Null model (to test whether diet is a significant predictor)
m2 <- lmer(weight ~ poly(Time,2)-1 + ((Time-1)|Chick), data = ChickWeight)

m2ci <- confint.merMod(m2)

summary(m2)

## Linear mixed model fit by REML ['lmerMod']
## Formula: weight ~ poly(Time, 2) - 1 + ((Time - 1) | Chick)
##    Data: ChickWeight
## 
## REML criterion at convergence: 4970.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4530 -0.5542  0.0874  0.5518  3.2246 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  Chick    Time 132.6    11.52   
##  Residual      176.7    13.29   
## Number of obs: 578, groups:  Chick, 50
## 
## Fixed effects:
##                Estimate Std. Error t value
## poly(Time, 2)1  -435.89      15.82 -27.547
## poly(Time, 2)2   125.84      13.42   9.376
## 
## Correlation of Fixed Effects:
##             p(T,2)1
## poly(Tm,2)2 0.012

Build Table

Now we need to build a model table and save it as a data frame. This table will include the important information needed to compare the full and null models.

t2 <- data.frame(vars = c("**Main Effects**",
                              "**Diet 1**", "**Diet 2**", "**Diet 3**", "**Diet 4**", "**Time**", "**Time^2^**",
                           "**Interaction Effects**",
                              "**Diet x Time**", 
                                  "*Diet 1*", "*Diet 2*", "*Diet 3*", "*Diet 4*",
                              "**Diet x Time^2^**",
                                  "*Diet 1*", "*Diet 2*", "*Diet 3*", "*Diet 4*",
                           "**Random Effects**",
                              paste0("**Residual**", "  ", "$\\boldsymbol \\sigma$"),
                           "**Model Statistics**",
                              "**Obs**", "**AIC**", "**BIC**"),
                  
                 model1 = c("", 
                            paste0(round(coef(summary(m1))[1:6,1],2), " [", 
                                   round(m1ci[3:8,1],2), ", ", round(m1ci[3:8,2],2), "]"), 
                            "", "", "Ref",
                            paste0(round(coef(summary(m1))[7:9,1],2), " [", 
                                   round(m1ci[9:11,1],2), ", ", round(m1ci[9:11,1],2), "]"), 
                            "", "Ref",
                            paste0(round(coef(summary(m1))[10:12,1],2), " [", 
                                   round(m1ci[12:14,1],2), ", ", round(m1ci[12:14,2],2), "]"),
                            "", 
                            round(summary(m1)$sigma,2),
                            "",
                            summary(m1)$ngrps, round(AIC(m1),2), round(BIC(m1),2)),
                 
                 
                 
                 
                  model2 = c("", "", "", "", "",
                            paste0(round(coef(summary(m2))[1:2,1],2), " [", 
                                   round(m2ci[3:4,1],2), ", ", round(m2ci[3:4,2],2), "]"), 
                            "", "", "", "", "",
                            "", "", "", "", 
                            "",
                            "", "", 
                            round(summary(m2)$sigma,2), 
                            "", 
                            summary(m2)$ngrps, round(AIC(m2),2), round(BIC(m2),2)),
                 
                 stringsAsFactors = FALSE
)

Lastly, you parse your table data frame into the Kable and KableExtra functions.

kable(t2,
      caption = "**Model Summary Table of ChickWeight Dataset**",
      col.names = c("", "Model 1", "Model 2"),
      align="lcc", escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_indent(c(10:13,15:18)) %>%
  row_spec(0, bold = T, background = "#666666", color = "white") %>%
  row_spec(c(1,8,19,21), bold = T, background = "#CCCCCC")

**Model Summary Table of ChickWeight Dataset**
	Model 1	Model 2
Main Effects
Diet 1	101.17 [88.98, 113.34]
Diet 2	120.86 [104.03, 137.69]
Diet 3	140.58 [123.75, 157.41]
Diet 4	134.25 [117.42, 151.09]
Time	1032.9 [845.1, 1220.6]	-435.89 [-466.9, -404.87]
Time²	70.35 [30.34, 110.6]	125.84 [99.54, 152.14]
Interaction Effects
Diet x Time
Diet 1	Ref
Diet 2	361.24 [42.11, 42.11]
Diet 3	813.58 [494.46, 494.46]
Diet 4	519.81 [200.44, 200.44]
Diet x Time²
Diet 1	Ref
Diet 2	52.72 [-14.22, 119.41]
Diet 3	209.53 [142.59, 276.22]
Diet 4	-6.28 [-74.23, 61.51]
Random Effects
Residual \(\boldsymbol \sigma\)	12.49	13.29
Model Statistics
Obs	50	50
AIC	4697.58	4978.46
BIC	4758.61	4995.9

Creating Tables in RMarkdown

Dr Alyce Russell
Postdoctoral Research Fellow / Co-founder R-Ladies Perth
a.russell@ecu.edu.au

Updated: 06 April 2020

Introduction

Load Packages

References

Example 1: General Summary Tables

Brief Data Preparation and Exploration

Summarise Data for Table

Build Table

Example 2: Model Summary Tables

Brief Data Preparation and Exploration

Fit Models

Build Table

Creating Tables in RMarkdown

Dr Alyce Russell Postdoctoral Research Fellow / Co-founder R-Ladies Perth a.russell@ecu.edu.au

Updated: 06 April 2020

Introduction

Load Packages

References

Example 1: General Summary Tables

Brief Data Preparation and Exploration

Summarise Data for Table

Build Table

Example 2: Model Summary Tables

Brief Data Preparation and Exploration

Fit Models

Build Table

Dr Alyce Russell
Postdoctoral Research Fellow / Co-founder R-Ladies Perth
a.russell@ecu.edu.au