Introduction


Below is an example of how you can do some calculations in R and build tables in RMarkdown. The code can be fiddly at first. However, the good thing is if you notice an error in your data and need to reload it or perhaps you add more participants, you just run the same code with the updated data. This saves so much time (yay, no copying and pasting… I’m guilty of doing this a lot in my past life).


Load Packages


If you haven’t previously used the below packages, they can be installed onto your computer using install.packages("packagename"). Thereafter, you only need to load the packages into your R session using library(packagename), as done below.


library(tidyverse)
library(kableExtra)
library(ggpubr)
library(ggstance)
library(broom)
library(lme4)


References


My go-to reference for formatting tables can be found at this link. You may also use Google to search for further details/modifications.


I will use two examples using datasets found within R:

  • a general summary table with counts, mean, standard deviation, test statistics and p-values

  • a model summary table with model output



Example 1: General Summary Tables


The first example will use the R dataset mtcars. First, we’ll assign the data to an object and check structure of the data frame.


data <- mtcars 

head(data)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
str(data)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...


Brief Data Preparation and Exploration


Before summarising the data for the table, it’s good to check the variables formats and distributions.


# Factorise cylinder variable
data$cyl <- factor(data$cyl, ordered = TRUE)
data$am <- factor(data$am, labels = c("Automatic", "Manual"))

# Check Counts of Transmission by Cylinder
table(data$cyl, data$am)
##    
##     Automatic Manual
##   4         3      8
##   6         4      3
##   8        12      2


# Create plots for a few variables. The geom_boxploth function comes from the ggstance package
p1 <- ggplot(data, aes(x = mpg, y = -0.5)) +
  geom_boxploth(aes(fill = cyl)) +
  geom_density(aes(x = mpg), inherit.aes = FALSE) +
  facet_grid(cyl ~ .) + scale_fill_discrete() +
  theme_bw() +
  xlab("Miles/Gallon") + ylab(" ") + labs(fill = "Cylinder") +
  ylim(-1, 1.1) + 
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
    )

p2 <- ggplot(data, aes(x = wt, y = -0.5)) +
  geom_boxploth(aes(fill = cyl)) +
  geom_density(aes(x = wt), inherit.aes = FALSE) +
  facet_grid(cyl ~ .) + scale_fill_discrete() +
  theme_bw() +
  xlab("Weight (1000 ibs)") + ylab(" ") + labs(fill = "Cylinder") +
  ylim(-1, 1.1) + 
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
    )

p3 <- ggplot(data, aes(x = hp, y = -0.5)) +
  geom_boxploth(aes(fill = cyl)) +
  geom_density(aes(x = hp), inherit.aes = FALSE) +
  facet_grid(cyl ~ .) + scale_fill_discrete() +
  theme_bw() +
  xlab("Gross Horsepower") + ylab(" ") + labs(fill = "Cylinder") +
  ylim(-1, 1.1) + 
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
    )


# The ggarrane function pastes the plots together for your output, from the ggpubr packer
ggarrange(
  p1, p2, p3,
  ncol = 3,
  common.legend = TRUE, legend="bottom"
)


Summarise Data for Table


Next, we’re going to summarise the data for our table.


# Overall counts and percentages for each cylinder 
summCounts <- as.data.frame(table(data$cyl))
summCounts$p <- round(summCounts$Freq/sum(summCounts$Freq)*100,1) 


# Mean/SD - mpg
summMean <- data %>%
  select(cyl, mpg) %>%
  group_by(cyl) %>%
  summarise_each(funs(mean(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)
  
summSD <- data %>%
  select(cyl, mpg) %>%
  group_by(cyl) %>%
  summarise_each(funs(sd(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)

# Median/MAD - Car weight and horsepower
summMedian <- data %>%
  select(cyl, wt, hp) %>%
  group_by(cyl) %>%
  summarise_each(funs(median(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)
  
summMAD <- data %>%
  select(cyl, wt, hp) %>%
  group_by(cyl) %>%
  summarise_each(funs(mad(., na.rm=TRUE))) %>% 
  as.data.frame() %>%
  mutate_at(-1, round, 2)


# Counts/% - Transmission
summTrans <- as.data.frame(table(data$am, data$cyl))
summTrans$p <- c(round(summTrans$Freq[1:2]/sum(summTrans$Freq[1:2])*100,1),
                 round(summTrans$Freq[3:4]/sum(summTrans$Freq[3:4])*100,1),
                 round(summTrans$Freq[5:6]/sum(summTrans$Freq[5:6])*100,1))


Perform some statistical tests for the table.


summTests <- data.frame(Stat = c(round(tidy(aov(data$mpg ~ data$cyl))$statistic[1], 3),
                                 round(tidy(aov(data$wt ~ data$cyl))$statistic[1], 3),
                                 round(tidy(aov(data$hp ~ data$cyl))$statistic[1], 3),
                                 round(chisq.test(data$cyl, data$am)$statistic, 3)),
                        Pvals = c(signif(tidy(aov(data$mpg ~ data$cyl))$p.value[1], 3),
                                   signif(tidy(aov(data$wt ~ data$cyl))$p.value[1], 3),
                                   signif(tidy(aov(data$hp ~ data$cyl))$p.value[1], 3),
                                   signif(chisq.test(data$cyl, data$am)$p.value, 3)),
                        stringsAsFactors = FALSE)


Build Table


Now we need to build a table and save it as a data frame. It is important to add space where needed in the table (i.e. when adding the test statistic and p-value for a categorical variable). Any formatting (bold, italics) may be done while building the table, as I have, but you may also parse argumaents later as KableExtra arguments.


t1 <- data.frame(var = c(paste0("**Count**", "  ", "*n (%)*"),
                         "",
                         paste0("**Miles/Gallon**", "  ", "*Mean (SD)*"),
                         paste0("**Car Weight (1000 lbs)**", "  ", "*Median (MAD)*"),
                         paste0("**Horsepower**", "  ", "*Median (MAD)*"),
                         paste0("**Transmission**", "  ", "*n (%)*"),
                         "*Automatic*", "*Manual*"),
                 
                 cyl4 = c(paste0(summCounts$Freq[1], " (", summCounts$p[1], "%)"),
                          "",
                          paste0(summMean$mpg[1], " (", summSD$mpg[1], ")"),
                          paste0(summMedian[1,2:3], " (", summMAD[1,2:3], ")"),
                          "",
                          paste0(summTrans$Freq[1:2], " (", summTrans$p[1:2], "%)")),
                 
                 cyl6 = c(paste0(summCounts$Freq[2], " (", summCounts$p[2], "%)"),
                          "",
                          paste0(summMean$mpg[2], " (", summSD$mpg[2], ")"),
                          paste0(summMedian[2,2:3], " (", summMAD[2,2:3], ")"),
                          "",
                          paste0(summTrans$Freq[3:4], " (", summTrans$p[3:4], "%)")),
                 
                 cyl8 = c(paste0(summCounts$Freq[3], " (", summCounts$p[3], "%)"),
                          "",
                          paste0(summMean$mpg[3], " (", summSD$mpg[3], ")"),
                          paste0(summMedian[3,2:3], " (", summMAD[3,2:3], ")"),
                          "",
                          paste0(summTrans$Freq[5:6], " (", summTrans$p[5:6], "%)")),
                 
                 stat = c("", "", 
                          paste0(summTests$Stat[1:3], footnote_marker_number(1)), "",  #add footnote for diff tests
                          paste0(summTests$Stat[4], footnote_marker_number(2)), ""),
                 
                 pval = c("", "", summTests$Pvals[1:3], "", summTests$Pvals[4], ""),
                 
                 stringsAsFactors = FALSE)

# Print table to check format, which will help with parsing to the table function
t1 
##                                         var         cyl4         cyl6
## 1                        **Count**  *n (%)*   11 (34.4%)    7 (21.9%)
## 2                                                                    
## 3             **Miles/Gallon**  *Mean (SD)* 26.66 (4.51) 19.74 (1.45)
## 4 **Car Weight (1000 lbs)**  *Median (MAD)*   2.2 (0.54)  3.21 (0.36)
## 5            **Horsepower**  *Median (MAD)*   91 (32.62)   110 (7.41)
## 6                 **Transmission**  *n (%)*                          
## 7                               *Automatic*    3 (27.3%)    4 (57.1%)
## 8                                  *Manual*    8 (72.7%)    3 (42.9%)
##            cyl8               stat     pval
## 1    14 (43.8%)                            
## 2                                          
## 3   15.1 (2.56) 39.698<sup>1</sup> 4.98e-09
## 4   3.75 (0.41) 22.911<sup>1</sup> 1.07e-06
## 5 192.5 (44.48) 36.177<sup>1</sup> 1.32e-08
## 6                                          
## 7    12 (85.7%)  8.741<sup>2</sup>   0.0126
## 8     2 (14.3%)


Lastly, you parse your table data frame into the Kable and KableExtra functions. You may use the reference link above to edit the formatting.


# HTML Table Function
kable(t1,
      caption = "**Summary Table of mtcars Dataset**",
      col.names = c("", "4 Cylinders", "6 Cylinders", "8 Cylinders", "Test", "P-Value"),
      align="lccccc", escape = FALSE) %>%
  
  # KableExtra Arguments
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_indent(c(7,8)) %>%
  footnote(number = c("ANOVA F Statistic", "Chi-Square Test"),
           general_title = "",
           footnote_as_chunk = T)
Summary Table of mtcars Dataset
4 Cylinders 6 Cylinders 8 Cylinders Test P-Value
Count n (%) 11 (34.4%) 7 (21.9%) 14 (43.8%)
Miles/Gallon Mean (SD) 26.66 (4.51) 19.74 (1.45) 15.1 (2.56) 39.6981 4.98e-09
Car Weight (1000 lbs) Median (MAD) 2.2 (0.54) 3.21 (0.36) 3.75 (0.41) 22.9111 1.07e-06
Horsepower Median (MAD) 91 (32.62) 110 (7.41) 192.5 (44.48) 36.1771 1.32e-08
Transmission n (%)
Automatic 3 (27.3%) 4 (57.1%) 12 (85.7%) 8.7412 0.0126
Manual 8 (72.7%) 3 (42.9%) 2 (14.3%)
1 ANOVA F Statistic 2 Chi-Square Test




Example 2: Model Summary Tables


The second example will use the R dataset ChickWeight. First, we’ll assign the data to an object and check the structure of the data frame.


data <- ChickWeight 

head(data)
## Grouped Data: weight ~ Time | Chick
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1
str(data)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   578 obs. of  4 variables:
##  $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
##  $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
##  $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "outer")=Class 'formula'  language ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(gm)"


Brief Data Preparation and Exploration


Before running the model, it’s good to check the variables formats and distributions (NOTE: in this case, the Chick and Diet variables were already factored).


# Relabel diet factor
data$Diet <- factor(data$Diet, labels = c("Diet1", "Diet2", "Diet3", "Diet4"))

# Explore Distribution Over Time
ggplot(data, aes(x = Time, y = weight, col=Chick)) + 
  geom_line () + geom_point() + 
  facet_wrap(~Diet) +
  theme_bw() + theme(legend.position = "none") +
  ylab("Weight (g)") + xlab("Age (days)")


Fit Models


We’re going to fit a linear mixed effects model to the data, with fixed effects Diet and Time, and a random intercept and slope for each Chick. We’re also going to fit Diet2 since there is evidence of a quadratic relationship. Further, we’ve added a 1st order autocorrelation and the intercept of the main effect variable is also set to 0 (via the -1). This makes sense biologically, but it is also the only way to make a model with a serial autocorrelation converge.

These steps are detailed further at this link.


# Full model
m1 <- lmer(weight ~ Diet * poly(Time,2)-1 + ((Time-1)|Chick), data = ChickWeight)

m1ci <- confint.merMod(m1)

summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: weight ~ Diet * poly(Time, 2) - 1 + ((Time - 1) | Chick)
##    Data: ChickWeight
## 
## REML criterion at convergence: 4669.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0825 -0.5313  0.0952  0.5013  3.4647 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  Chick    Time   6.607   2.57   
##  Residual      156.113  12.49   
## Number of obs: 578, groups:  Chick, 50
## 
## Fixed effects:
##                      Estimate Std. Error t value
## Diet1                 101.166      6.356  15.917
## Diet2                 120.862      8.786  13.756
## Diet3                 140.579      8.786  16.000
## Diet4                 134.251      8.788  15.276
## poly(Time, 2)1       1032.903     97.999  10.540
## poly(Time, 2)2         70.347     20.593   3.416
## Diet2:poly(Time, 2)1  361.236    166.626   2.168
## Diet3:poly(Time, 2)1  813.581    166.626   4.883
## Diet4:poly(Time, 2)1  519.814    166.756   3.117
## Diet2:poly(Time, 2)2   52.718     34.288   1.537
## Diet3:poly(Time, 2)2  209.527     34.288   6.111
## Diet4:poly(Time, 2)2   -6.275     34.830  -0.180
## 
## Correlation of Fixed Effects:
##             Diet1  Diet2  Diet3  Diet4  p(T,2)1 p(T,2)2 D2:(T,2)1 D3:(T,2)1
## Diet2        0.000                                                         
## Diet3        0.000  0.000                                                  
## Diet4        0.000  0.000  0.000                                           
## poly(Tm,2)1  0.970  0.000  0.000  0.000                                    
## poly(Tm,2)2  0.006  0.000  0.000  0.000  0.018                             
## Dt2:p(T,2)1 -0.570  0.785  0.000  0.000 -0.588  -0.011                     
## Dt3:p(T,2)1 -0.570  0.000  0.785  0.000 -0.588  -0.011   0.346             
## Dt4:p(T,2)1 -0.570  0.000  0.000  0.785 -0.588  -0.011   0.346     0.346   
## Dt2:p(T,2)2 -0.003 -0.001  0.000  0.000 -0.011  -0.601   0.003     0.006   
## Dt3:p(T,2)2 -0.003  0.000 -0.001  0.000 -0.011  -0.601   0.006     0.003   
## Dt4:p(T,2)2 -0.003  0.000  0.000  0.003 -0.011  -0.591   0.006     0.006   
##             D4:(T,2)1 D2:(T,2)2 D3:(T,2)2
## Diet2                                    
## Diet3                                    
## Diet4                                    
## poly(Tm,2)1                              
## poly(Tm,2)2                              
## Dt2:p(T,2)1                              
## Dt3:p(T,2)1                              
## Dt4:p(T,2)1                              
## Dt2:p(T,2)2  0.006                       
## Dt3:p(T,2)2  0.006     0.361             
## Dt4:p(T,2)2  0.009     0.355     0.355
# Null model (to test whether diet is a significant predictor)
m2 <- lmer(weight ~ poly(Time,2)-1 + ((Time-1)|Chick), data = ChickWeight)

m2ci <- confint.merMod(m2)

summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: weight ~ poly(Time, 2) - 1 + ((Time - 1) | Chick)
##    Data: ChickWeight
## 
## REML criterion at convergence: 4970.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4530 -0.5542  0.0874  0.5518  3.2246 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  Chick    Time 132.6    11.52   
##  Residual      176.7    13.29   
## Number of obs: 578, groups:  Chick, 50
## 
## Fixed effects:
##                Estimate Std. Error t value
## poly(Time, 2)1  -435.89      15.82 -27.547
## poly(Time, 2)2   125.84      13.42   9.376
## 
## Correlation of Fixed Effects:
##             p(T,2)1
## poly(Tm,2)2 0.012


Build Table


Now we need to build a model table and save it as a data frame. This table will include the important information needed to compare the full and null models.


t2 <- data.frame(vars = c("**Main Effects**",
                              "**Diet 1**", "**Diet 2**", "**Diet 3**", "**Diet 4**", "**Time**", "**Time^2^**",
                           "**Interaction Effects**",
                              "**Diet x Time**", 
                                  "*Diet 1*", "*Diet 2*", "*Diet 3*", "*Diet 4*",
                              "**Diet x Time^2^**",
                                  "*Diet 1*", "*Diet 2*", "*Diet 3*", "*Diet 4*",
                           "**Random Effects**",
                              paste0("**Residual**", "  ", "$\\boldsymbol \\sigma$"),
                           "**Model Statistics**",
                              "**Obs**", "**AIC**", "**BIC**"),
                  
                 model1 = c("", 
                            paste0(round(coef(summary(m1))[1:6,1],2), " [", 
                                   round(m1ci[3:8,1],2), ", ", round(m1ci[3:8,2],2), "]"), 
                            "", "", "Ref",
                            paste0(round(coef(summary(m1))[7:9,1],2), " [", 
                                   round(m1ci[9:11,1],2), ", ", round(m1ci[9:11,1],2), "]"), 
                            "", "Ref",
                            paste0(round(coef(summary(m1))[10:12,1],2), " [", 
                                   round(m1ci[12:14,1],2), ", ", round(m1ci[12:14,2],2), "]"),
                            "", 
                            round(summary(m1)$sigma,2),
                            "",
                            summary(m1)$ngrps, round(AIC(m1),2), round(BIC(m1),2)),
                 
                 
                 
                 
                  model2 = c("", "", "", "", "",
                            paste0(round(coef(summary(m2))[1:2,1],2), " [", 
                                   round(m2ci[3:4,1],2), ", ", round(m2ci[3:4,2],2), "]"), 
                            "", "", "", "", "",
                            "", "", "", "", 
                            "",
                            "", "", 
                            round(summary(m2)$sigma,2), 
                            "", 
                            summary(m2)$ngrps, round(AIC(m2),2), round(BIC(m2),2)),
                 
                 stringsAsFactors = FALSE
)


Lastly, you parse your table data frame into the Kable and KableExtra functions.


kable(t2,
      caption = "**Model Summary Table of ChickWeight Dataset**",
      col.names = c("", "Model 1", "Model 2"),
      align="lcc", escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_indent(c(10:13,15:18)) %>%
  row_spec(0, bold = T, background = "#666666", color = "white") %>%
  row_spec(c(1,8,19,21), bold = T, background = "#CCCCCC")
Model Summary Table of ChickWeight Dataset
Model 1 Model 2
Main Effects
Diet 1 101.17 [88.98, 113.34]
Diet 2 120.86 [104.03, 137.69]
Diet 3 140.58 [123.75, 157.41]
Diet 4 134.25 [117.42, 151.09]
Time 1032.9 [845.1, 1220.6] -435.89 [-466.9, -404.87]
Time2 70.35 [30.34, 110.6] 125.84 [99.54, 152.14]
Interaction Effects
Diet x Time
Diet 1 Ref
Diet 2 361.24 [42.11, 42.11]
Diet 3 813.58 [494.46, 494.46]
Diet 4 519.81 [200.44, 200.44]
Diet x Time2
Diet 1 Ref
Diet 2 52.72 [-14.22, 119.41]
Diet 3 209.53 [142.59, 276.22]
Diet 4 -6.28 [-74.23, 61.51]
Random Effects
Residual \(\boldsymbol \sigma\) 12.49 13.29
Model Statistics
Obs 50 50
AIC 4697.58 4978.46
BIC 4758.61 4995.9