Introduction

Code

##############################################################
#            easytable-1 function                         #
##############################################################

#Hit ctrl+A, select all the text here and run this entire script (easytable1.R)
#The first time you run this, it might take a while to install a
#the expss and flextable packagage. Please be patient! It gets much
#quicker on subsequent runs. 
#Read through the instructions here for details on using easytable1().
#This script also generates a sample dataset called mydat, and demonstrates,
#with examples, how to format the input dataset.
#Once the entire script is run successfully, open up your script 
#and use the function to get a formatted table1 !




#############################
# About easytable1()
#############################


#easytable1 function: this is a function I wrote to build descriptive statistics that
#are usually seen in the table1s that my colleagues use at BIDMC, Boston MA.
#we usually use this type of table for clinical trials where the subjects are randomized
#between two groups, a treatment and a control.
#This is a personal project that I embarked on to make my day to day routine work easier. 
#This isn't an official BIDMC endorsed projec.


# This function relies on the expss package by Gregory Demin a
# https://cran.r-project.org/web/packages/expss/expss.pdf
# to format the input labels, and flextable package by David Gohel
# https://davidgohel.github.io/flextable/articles/overview.html) to format the output. 
# This function would not have been possible without their wonderful code.
# Mac users may have trouble with installing the flextable package.
# please refer to this post for a fix. 
# https://stackoverflow.com/questions/48199737/cant-install-flextable
# Sorry for the inconvenience.


#The general structure of easytable1() function is as follows:

#easytable1(numerics = c("insertnumericvarnamehere", "insertnumericvarnamehere", ...),
#            logicals = c("insertlogicalvarnamehere", "insertlogicalvarnamehere", ...),
#            factors =  c("insertfactorvarnamehere", "insertfactorvarnamehere", ...), # Atleast one of numerics,logicals,or factors is mandatory.
#                by = "inserttreatgroupvarnamehere",                                  # A by group is mandatory
#               data = dataframename,                                                 # This is your input dataframe. Enter the name of the dataframe without quotes. This is mandatory.
#               title = "inserttabletitlehere")                                       # Insert table title here. This is optional.
 

#The output of the easytable1() function is:
# A mean (SD) if the numeric variable is normally distributed, or a median and IQR if the numeric variable is not normally distributed.
# A count and % for levels of factor variables, and for binary (logical variables)
# These statistics are stratified by treatment groups.
# #Missing values are excluded for calculations of variable level counts, percentages, means, SDs, medians and IQRs.




#Load your sample dataset (mydat)
#---------------------------------------------------------------

#Create dataset
id <- seq(from = 1,
          to = 50,
          by = 1 )
#set seed
set.seed(125)


#create treatment group codes.
treatgrp <- sample(x =  c(0, 1), #0 represents placebo, 1 represents treated
                   size = 50,
                   replace = TRUE,
                   prob = c(0.5, 0.5))

#continuous variable:normally distributed
hemoglobin <- rnorm(n = 50, 
                    mean = 11.0,
                    sd = 1.5)
shapiro.test(hemoglobin)
## 
##  Shapiro-Wilk normality test
## 
## data:  hemoglobin
## W = 0.97231, p-value = 0.2868
#continous variable:skew distribution
weights <- sort(rnorm(n = 50, 
                      mean = 172.3,
                      sd = 29))
skewedweights <- sample(weights, 
                        size = 50,
                        replace = TRUE,
                        prob = c(50:31, 30:16, 1:15))
#hist(skewedweights)
shapiro.test(skewedweights)
## 
##  Shapiro-Wilk normality test
## 
## data:  skewedweights
## W = 0.92818, p-value = 0.004719
#Create binary (dichotomous) variable
received_blood <- sample(x =  c(0, 1),
                         size = 50,
                         replace = TRUE,
                         prob = c(0.5, 0.5))

#Another binary variable
insurance_status <- sample(x = c(0, 1),
                           size = 50,
                           replace = TRUE,
                           prob = c(0.7, 0.3))

#Another binary variable
survived <- sample(x = c(0, 1),
                   size = 50,
                   replace = TRUE,
                   prob = c(0.5, 0.5))


#create factor variables - race and hospital disposition

racevec <- c("White", "African-American", "Asian", "Alaska Native/American Indian")
race <- sample( x = racevec,
                size = 50,
                replace = TRUE,
                prob = c(0.7, 0.2, 0.6, 0.4))

hospvec <- c("Home", "Home with home care", "Nursing facility", "Rehab", "Retained")
hospdisp <- sample( x = hospvec,
                    size = 50,
                    replace = TRUE,
                    prob = c(rep(0.20, 5)))

#bind all variables together in a dataset
mydat <- data.frame(ID = id,
                    treat = treatgrp,
                    hb = hemoglobin,
                    weight = skewedweights,
                    blood_rcv = received_blood,
                    insured = insurance_status,
                    race = race,
                    disp = hospdisp)


#make sure mydat is cleaned and coded appropriately.
str(mydat)
## 'data.frame':    50 obs. of  8 variables:
##  $ ID       : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ treat    : num  0 1 1 1 0 0 0 1 0 0 ...
##  $ hb       : num  11.54 8.24 11.84 12.34 11.59 ...
##  $ weight   : num  163 156 166 182 120 ...
##  $ blood_rcv: num  0 0 1 0 0 0 1 0 0 1 ...
##  $ insured  : num  0 0 1 1 0 1 0 0 0 0 ...
##  $ race     : Factor w/ 4 levels "African-American",..: 4 1 3 4 4 4 2 2 4 1 ...
##  $ disp     : Factor w/ 5 levels "Home","Home with home care",..: 2 2 1 2 5 4 3 2 4 5 ...
mydat$ID <- as.integer(mydat$ID)

#-----------------------------------------------------------------

#View the structure of the sample dataset mydat here
str(mydat)
## 'data.frame':    50 obs. of  8 variables:
##  $ ID       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ treat    : num  0 1 1 1 0 0 0 1 0 0 ...
##  $ hb       : num  11.54 8.24 11.84 12.34 11.59 ...
##  $ weight   : num  163 156 166 182 120 ...
##  $ blood_rcv: num  0 0 1 0 0 0 1 0 0 1 ...
##  $ insured  : num  0 0 1 1 0 1 0 0 0 0 ...
##  $ race     : Factor w/ 4 levels "African-American",..: 4 1 3 4 4 4 2 2 4 1 ...
##  $ disp     : Factor w/ 5 levels "Home","Home with home care",..: 2 2 1 2 5 4 3 2 4 5 ...
#----------------------------------------------------------------------------------------------------------------
#             How to use the function                                                                           #
#----------------------------------------------------------------------------------------------------------------

#For this function to work smoothly, mydat's input should:



#-----------> have variables supplied as components of one of the three input arguments. For example
#             if you wish to have stats for the binary variable, 'insured', you should ensure it has values 1 for 
#             presence of insurance and 0 for absence of insurance, and then supply it to function as:

#             easytable1(logicals = "insured", by = "treat", data = mydat). 

#            Any variable that is coded 1 for the presence of a characteristic and 0 for the absence can be fed into
#            logicals =  argument, regardless of whether it is actually numeric or integer.



#             If you wanted to add stats for factor variable 'race', you should ensure that its levels are labelled, 
#             and then that the variable itself is labelled (see assigning variable labels), and then feed it to the 
#             function along with 'insured' as:
            
#              easytable1( factors = "race", logicals = "insured", by = "treat", data = mydat)


#             if you wanted to add stats for numeric variable weight, you don't have to do any labelling or value coding,
#             just supply it directly as :

#             easytable1(numerics = "weight", logicals = "insured", factors = "race", by = "treat", data = mydat)

#             Numeric variables that take on integer values (like IQ score) can also be supplied as part of the numerics argument.



#----------> should always have input dataframe and a by variable provided. Notice in the previous examples, 
#            we provided a by = "treat" and data = mydat regardless of what the input was.
#            By variable should only have values 1 and 0 (1 for intervention, 0 for placebo). This function
#            does not support presence of more groups.


# --------->  as the last step, have variable labels for input variables applied. For example, if 
#            disp and race are your input variables, ensure that they have appropriate variable labels applied, 
#            something like "Hospital Disposition", and "Racial Group".  It is important to remember that 
#            the individual levels of disp and race be labelled before the disp and race themselves are labelled.
#            This is demonstrated below.

#-----------------------------------------------------------------------------------------------#
# Assigning variable lables to input variables (Assign just before using easytable1() function)
#-----------------------------------------------------------------------------------------------#
#                         Demonstration of labelling: order of labelling     
#                        ---------------------------------------------------

# Say your input variables from mydat are disp (factor), race (factor), and hb (numeric)

# Before setting variable labels in your dataset, label the individual labels of the factor variables.

#         mydat$disp <- factor(mydat$disp, levels = c("hme", "hhc", "nrsing", "rehb", retain"),
#                                         labels = c("Home", "Home with home care", "Nursing facility", "Rehab", "Retained")
#         mydat$race <- factor(mydat$race, levels = c("afam", "an/ai", "asi", white"), labels = c("African-American", "Alaska Native/American Indian",
#                               "Asian", "White"))

# Now, label the factor variables (and other input variables, if present) themselves. You use the apply_labels() function from 
# the expss package. This script will automatically apply the 

#        mydat <- apply_labels(mydat, 
#                               race = "Race",
#                               disp = "Disposition",
#                                 hb = "Hemoglobin (g/dL)")

#  Notice, you do not have to label variables that you aren't inputting into the easytable1() function.
#  For example, I did not label "insured" because I do not to see it in my output table.

#Now, you are ready to use the easytable1() function.

#      easytable1(numerics = "hb", 
#                 factors = c("race", "disp"),
#                 by = "treat",
#                 data = mydat,
#                 title = "Table1: Only 3 variables") 


#Loading expss package and assigning variable labels to sample dataset (sample dataset factors already have labels applied to levels)
#-------------------------------------------------------------------------------------------------------------------------------------

#library needed
labellibrary <- "expss"
for (l in labellibrary) {
  if(!require(l, character.only = TRUE))
    install.packages(l, dependencies = TRUE)
  library(l,character.only = TRUE)
}
## Loading required package: expss
## 
## Use 'expss_output_rnotebook()' to display tables inside R Notebooks.
##  To return to the console output, use 'expss_output_default()'.
#this installs and calls expss for user who do not have it already
#You need to use the apply_labels function from this package to have all your input variables 
#labelled. 


#for example: if mydat is your input dataset
#and you want a table that calculates statistics for variables hb, weight, blood_rcv, insured, race and disp
#all 6 of these these variables MUST be labelled. Other variables that are not being fed into
#easytable1() do not need to be labelled.

#label your variables as demonstrated below:
mydat <- apply_labels(mydat,
                      hb = "Hemoglobin (g/dL)",
                      weight = "Weight (lbs)",
                      blood_rcv = "n (%) received blood",
                      insured = "n (%) with insurance",
                      race = "Race",
                      disp = "Disposition")
# Notice, we did not have to label variables that we were not 
# inputting into the function. For example, ID was not being fed into
# the easytable1 function, and so it did not need to be labelled.


#-------------------------------------------------------------------------------------------------------------------

#function begins

easytable1 <- function(numerics, logicals, factors, data, by, title){ #input dataset should be a labelled dataset
  
  
  #----------------------------------------------------------------------------------------
  
  #blanket warning for missing inputs
  if ((missing(factors) & missing(numerics) & missing(logicals))|missing(data)|missing(by)) {
    stop("Please provide the name of atleast one input column, a 'by' column, and a dataset")
  }  
  
  #install required packages
  requiredPackages <- c("flextable", "magrittr")
  
  for (p in requiredPackages) {
    if(!require(p, character.only = TRUE))
      install.packages(p, dependencies = TRUE)
    library(p,character.only = TRUE)
  }
  
  #-----------------------------------------------------------------------------------------
  
  #make 2 subsets of data using the 'by' input
  x <- data[data[[by]] == "1", ] #by represents a column name with quotes. Provided in input.
  y <- data[data[[by]] == "0", ]
  
  
  #save lengths to use later in table labels
  num_int <- nrow(x)
  num_con <- nrow(y)
  
  #-----------------------------------------------------------------------------------------
  
  #empty dataframe
  emptydat <- data.frame( Type = character(),
                          var = character(),
                          metric = character())
  
  
  #-------------------------------------------------------------------------------------
  #1st loop: For numerics from x
  #------------------------------------------------------------------------------------
  
  if (!(missing(numerics))) { #open if clause - allows numerics to be missing in input
    
    
    for (i in numerics){ # open numerics loop
      
      #extract the p value
      shaptest <- shapiro.test(x[,i])
      pvalue <- shaptest[[2]]
      
      #if data is normal
      if (pvalue > 0.05) {
        
        mean <- round(mean(x[,i], na.rm = TRUE), 1) #NAs omitted from calculations, in both mean and SD
        sd <- round(sd(x[,i], na.rm = TRUE), 1) 
        
        meanchar <- formatC(mean, format = "f", digits = 1) #Convert data to character for formatted display
        sdchar <- formatC(sd, format = "f", digits = 1)
        combine <- paste0(meanchar, " ", "(", sdchar, ")")
        
        #create loop dataframe for current iteration
        loopdata <- data.frame(Type = "Numeric",         #This shows up in row label (Flextable)
                               var = var_lab(x[,i]),  #This is display label for the variable. 
                               metric = combine)
        
        #add this iteration's data to empty dataset  
        emptydat <- rbind(emptydat, loopdata)
        
        
      } #if ends: for normal data
      #if data is not normal
      else                {
        
        #calculate median and IQR
        median <- median(x[, i], na.rm = TRUE)#NAs omitted.
        quart1 <- round(quantile(x[, i] , 0.25), 1)
        quart3 <- round(quantile(x[, i] , 0.75), 1)
        
        #change them to characters, and combine to display with formatting.
        medchar <- formatC(median, format = "f", digits = 1)
        quart1char <- formatC(quart1, format = "f", digits = 1)
        quart3char <- formatC(quart3, format = "f", digits = 1)
        combine <- paste0(medchar, " ", "(", quart1char, ", ", quart3char, ")")
        
        #Create the loop dataframe
        loopdata <- data.frame(Type = "Numeric",         #This shows up in row label (Flextable)
                               var = var_lab(x[,i]),  #This is display label for the variable. 
                               metric = combine)
        
        
        #add this iteration's data to empty dataset  
        emptydat <- rbind(emptydat, loopdata)
      } #else ends: for non-normal data
    } #close numerics loop
  } #close if clause to allow missing numerics in input 
  
  #-------------------------------------------------------------------------------------
  # 2nd loop: For logicals from x
  #------------------------------------------------------------------------------------ 
  
  if (!(missing(logicals))) { #open if clause - allows logicals to be missing in input
    
    
    for (i in logicals) { #open loop for logicals
      
      count <- sum(x[, i])                                     #get count for 1s in logical column
      table <- table(x[, i], useNA = "no")                     #prepare a table. Using a sum on this table gets you counts of this logical vector with NAs excluded.
      denom <- sum(table)                                         #get the denominator
      pct <- round((count*100/denom),digits = 1)                  #get pctages (rounded to 1 decimal point)
      
      countchar <- formatC(count)                                 #convert the counts and pcts to character, and combine them with appropriate formatting.
      pctchar <- formatC(pct, format = "f", digits = 1)
      combine <- paste0(countchar, " ", "(", pctchar, " ", "%", ")")
      
      #Create the loop dataframe
      loopdata <- data.frame(Type = "Logical",         #This shows up in row label (Flextable)
                             var = var_lab(x[,i]),  #This is display label for the variable. 
                             metric = combine)
      
      
      #Bind this iteration of logical data to the skeleton data. Note: It should already have the values for the numeric variables.
      emptydat <- rbind(emptydat, loopdata) 
    } #close logicals loop
  } #close if clause to allow missing logicals in input
  
  #-------------------------------------------------------------------------------------
  # 3rd loop: For factor from x
  #------------------------------------------------------------------------------------ 
  
  if (!(missing(factors))) { #open if clause - allows factors to be missing in input
    
    
    for (i in factors) {
      
      table_fac <- table(x[ , i], useNA = "no") #create a table of factor variable currently in loop. Make sure missing values are not counted in denominator.
      nn <- names(table_fac)                       #Store the names of the levels of factor in nn. They should have already been labelled.
      list_fac <- lapply(table_fac, function(r) list(FREQ = r,
                                                     PCTnoNA = 100*r/sum(table_fac)))
      # r just accesses frequencies from table
      # look closely at the PCTnoNA denominator
      # it is the sum(table_fac)
      # This is the sum of the counts of the factors
      # listed out in the table. Since we excluded  
      # missing values from the table using useNA = "no"
      # the denominator is calculated excluding NAs.
      
      unlist_fac <- unlist(list_fac)                                   #unlist to break up into components that can be arranged in a matrix
      fac_matrix <- t(matrix(unlist_fac, nrow = 2, byrow = FALSE))     #create the matrix
      rownames(fac_matrix) <- nn                                       #apply names of factors (from table) to matrix
      fac_df <- data.frame(fac_matrix)
      
      #Manipulate dataframe to hold counts and percentages. Look into making code cleaner.
      fac_df$freqchar <- formatC(fac_df$X1)
      fac_df$pctchar <- formatC(fac_df$X2, format = "f", digits = 1)
      fac_df$combine <- paste0(fac_df$freqchar, " ", "(", fac_df$pctchar, " ", "%", ")")
      
      #keep only column which is called combine.
      fac_df <- data.frame(fac_df[, which(names(fac_df) == "combine")])
      
      #This subsetting changes column name, unfortunately. So rename it.
      names(fac_df) <- "metric"
      fac_df$var <- nn                                             # Factor labels applied here
      fac_df$Type <- paste0("Categorical -", var_lab(data[,i]))    # Variable Label applied here. It is combined with a string that calls it Categorical.
      
      #reorder                                                    #This is done to ensure that columns are in the same order as empty dataset. Fcilitates rbind.
      order <- c("Type", "var", "metric")
      fac_df <- fac_df[, order]
      
      #add to empty dataset
      emptydat <- rbind(emptydat, fac_df)
    } #close factor loop
  } #close if clause - allows factors to be missing in input 
  
  

  #Save intervention data:
  intervention <- emptydat
  
  
  #-----------------------------------------------------------------------------------------
  
  #******************************************************************************************
  #Group 2: Working with the control group (y)
  #******************************************************************************************        
  
  #Repeat all the above steps, but make sure to change:
  
  # x to y (calculate stats only on the subset of data that has treatment code = 0)
  
  
  
  # Do not drop Type and var. We will drop it at the end. Idea is to avoid changing code.
  # Drop Type and var. These already exist in intervention, do not have to repeated here, will be the same for the two groups.  
  
  #No NEED TO CHANGE:
  # emptydat_X to emptydat_Y. 
  # metric to metric0 , to reflect that these are being calculated for treatment code = 0
  # Add suffix _y to all objects within function to distinguish these objects between 2 groups.
  #BECAUSE RESULTS FROM FIRST DATASET ARE ALREADY SAVED IN INTERVENTION
  

  
  #empty dataframe
  emptydat <- data.frame( Type = character(),
                          var = character(),
                          metric = character())
  
  
  #-------------------------------------------------------------------------------------
  #1st loop: For numerics from y
  #------------------------------------------------------------------------------------
  
  if (!(missing(numerics))) { #open if clause - allows numerics to be missing in input
    
    
    for (i in numerics){ # open numerics loop
      
      #extract the p value
      shaptest <- shapiro.test(y[,i])
      pvalue <- shaptest[[2]]
      
      #if data is normal
      if (pvalue > 0.05) {
        
        mean <- round(mean(y[,i], na.rm = TRUE), 1) #NAs omitted from calculations, in both mean and SD
        sd <- round(sd(y[,i], na.rm = TRUE), 1) 
        
        meanchar <- formatC(mean, format = "f", digits = 1) #Convert data to character for formatted display
        sdchar <- formatC(sd, format = "f", digits = 1)
        combine <- paste0(meanchar, " ", "(", sdchar, ")")
        
        #create loop dataframe for current iteration
        loopdata <- data.frame(Type = "Numeric",         #This shows up in row label (Flextable)
                               var = var_lab(y[,i]),  #This is display label for the variable. 
                               metric = combine)
        
        #add this iteration's data to empty dataset  
        emptydat <- rbind(emptydat, loopdata)
        
        
      } #if ends: for normal data
      #if data is not normal
      else                {
        
        #calculate median and IQR
        median <- median(y[, i], na.rm = TRUE)#NAs omitted.
        quart1 <- round(quantile(data[, i] , 0.25), 1)
        quart3 <- round(quantile(data[, i] , 0.75), 1)
        
        #change them to characters, and combine to display with formatting.
        medchar <- formatC(median, format = "f", digits = 1)
        quart1char <- formatC(quart1, format = "f", digits = 1)
        quart3char <- formatC(quart3, format = "f", digits = 1)
        combine <- paste0(medchar, " ", "(", quart1char, ", ", quart3char, ")")
        
        #Create the loop dataframe
        loopdata <- data.frame(Type = "Numeric",         #This shows up in row label (Flextable)
                               var = var_lab(y[,i]),  #This is display label for the variable. 
                               metric = combine)
        
        
        #add this iteration's data to empty dataset  
        emptydat <- rbind(emptydat, loopdata)
      } #else ends: for non-normal data
    } #close numerics loop
  } #close if clause to allow missing numerics in input 
  
  #-------------------------------------------------------------------------------------
  # 2nd loop: For logicals
  #------------------------------------------------------------------------------------ 
  
  if (!(missing(logicals))) { #open if clause - allows logicals to be missing in input
    
    
    for (i in logicals) { #open loop for logicals
      
      count <- sum(y[, i])                                     #get count for 1s in logical column
      table <- table(y[, i], useNA = "no")                     #prepare a table. Using a sum on this table gets you counts of this logical vector with NAs excluded.
      denom <- sum(table)                                         #get the denominator
      pct <- round((count*100/denom),digits = 1)                  #get pctages (rounded to 1 decimal point)
      
      countchar <- formatC(count)                                 #convert the counts and pcts to character, and combine them with appropriate formatting.
      pctchar <- formatC(pct, format = "f", digits = 1)
      combine <- paste0(countchar, " ", "(", pctchar, " ", "%", ")")
      
      #Create the loop dataframe
      loopdata <- data.frame(Type = "Logical",         #This shows up in row label (Flextable)
                             var = var_lab(y[,i]),  #This is display label for the variable. 
                             metric = combine)
      
      
      #Bind this iteration of logical data to the skeleton data. Note: It should already have the values for the numeric variables.
      emptydat <- rbind(emptydat, loopdata) 
    } #close logicals loop
  } #close if clause to allow missing logicals in input
  
  #-------------------------------------------------------------------------------------
  # 3rd loop: For factor
  #------------------------------------------------------------------------------------ 
  
  if (!(missing(factors))) { #open if clause - allows factors to be missing in input
    
    
    for (i in factors) {
      
      table_fac <- table(y[ , i], useNA = "no") #create a table of factor variable currently in loop. Make sure missing values are not counted in denominator.
      nn <- names(table_fac)                       #Store the names of the levels of factor in nn. They should have already been labelled.
      list_fac <- lapply(table_fac, function(r) list(FREQ = r,
                                                     PCTnoNA = 100*r/sum(table_fac)))
      # r just accesses frequencies from table
      # look closely at the PCTnoNA denominator
      # it is the sum(table_fac)
      # This is the sum of the counts of the factors
      # listed out in the table. Since we excluded  
      # missing values from the table using useNA = "no"
      # the denominator is calculated excluding NAs.
      
      unlist_fac <- unlist(list_fac)                                   #unlist to break up into components that can be arranged in a matrix
      fac_matrix <- t(matrix(unlist_fac, nrow = 2, byrow = FALSE))     #create the matrix
      rownames(fac_matrix) <- nn                                       #apply names of factors (from table) to matrix
      fac_df <- data.frame(fac_matrix)
      
      #Manipulate dataframe to hold counts and percentages. Look into making code cleaner.
      fac_df$freqchar <- formatC(fac_df$X1)
      fac_df$pctchar <- formatC(fac_df$X2, format = "f", digits = 1)
      fac_df$combine <- paste0(fac_df$freqchar, " ", "(", fac_df$pctchar, " ", "%", ")")
      
      #keep only column which is called combine.
      fac_df <- data.frame(fac_df[, which(names(fac_df) == "combine")])
      
      #This subsetting changes column name, unfortunately. So rename it.
      names(fac_df) <- "metric"
      fac_df$var <- nn                                             # Factor labels applied here
      fac_df$Type <- paste0("Categorical -", var_lab(y[,i]))    # Variable Label applied here. It is combined with a string that calls it Categorical.
      
      #reorder                                                    #This is done to ensure that columns are in the same order as empty dataset. Fcilitates rbind.
      order <- c("Type", "var", "metric")
      fac_df <- fac_df[, order]
      
      #add to empty dataset
      emptydat <- rbind(emptydat, fac_df)
    } #close factor loop
  } #close if clause - allows factors to be missing in input 
  
  

  #Save control data:
  control <- emptydat
  #drop Type and var from control (These contain exactly same info as 1st 2 columns in intervention)
  control <- control[, -c(1,2)]
  
  
  
  #-----------------------------------------------------------------------------------------
  
  
  
  #Bind both datasets:
  
  out <- cbind(intervention, control)
  names(out)[2] <- "Characteristic"
  names(out)[3] <- paste0("Intervention\nN = ", nrow(x))
  names(out)[4] <- paste0("Placebo\nN = ", nrow(y))
 
  
  
  #--------------------------------------------------------------------------------------------
  

  #return(out) #If you uncomment this, all subsequent lines dont run in function, and the output of function is a dataset
  
  #Create grouped dataset
  mygroupeddata <- as_grouped_data( x= out,
                                    groups = c("Type"))
  
  #return(mygroupeddata) #If you uncomment this, all subsequent lines dont run in function, and the output of function is a grouped dataset
  
  #Create flextable
  myflextable <- as_flextable( mygroupeddata ) %>%
    bold(j = 1, i = ~ !is.na(Type), bold = TRUE, part = "body" ) %>%
    bold(part = "header", bold = TRUE ) %>% autofit()
  
  
  if (!(missing(title))) {
    
    myflextable <- set_caption(myflextable, title) %>% autofit()

  }
  
  
  #end function: this is the only output  
  return(myflextable)
  
  
  
} #function ends

#----------------------------------------------------------------------------------------------




#Testing function on sample dataset.
#out <- easytable1(numerics = c("hb", "weight"),
#                    logicals = c("insured", "blood_rcv"),
#                    factors = c("race"),
#                    data = mydat,
#                    by = "treat")

#out

Using the function:

#Create dataset
id <- seq(from = 1,
          to = 50,
          by = 1 )
#set seed
set.seed(125)


#create treatment group codes.
treatgrp <- sample(x =  c(0, 1), #0 represents placebo, 1 represents treated
                   size = 50,
                   replace = TRUE,
                   prob = c(0.5, 0.5))

#continuous variable:normally distributed
hemoglobin <- rnorm(n = 50, 
                    mean = 11.0,
                    sd = 1.5)
shapiro.test(hemoglobin)

#continous variable:skew distribution
weights <- sort(rnorm(n = 50, 
                      mean = 172.3,
                      sd = 29))
skewedweights <- sample(weights, 
                        size = 50,
                        replace = TRUE,
                        prob = c(50:31, 30:16, 1:15))
#hist(skewedweights)
shapiro.test(skewedweights)

#Create binary (dichotomous) variable
received_blood <- sample(x =  c(0, 1),
                         size = 50,
                         replace = TRUE,
                         prob = c(0.5, 0.5))

#Another binary variable
insurance_status <- sample(x = c(0, 1),
                           size = 50,
                           replace = TRUE,
                           prob = c(0.7, 0.3))

#Another binary variable
survived <- sample(x = c(0, 1),
                   size = 50,
                   replace = TRUE,
                   prob = c(0.5, 0.5))


#create factor variables - race and hospital disposition

racevec <- c("White", "African-American", "Asian", "Alaska Native/American Indian")
race <- sample( x = racevec,
                size = 50,
                replace = TRUE,
                prob = c(0.7, 0.2, 0.6, 0.4))

hospvec <- c("Home", "Home with home care", "Nursing facility", "Rehab", "Retained")
hospdisp <- sample( x = hospvec,
                    size = 50,
                    replace = TRUE,
                    prob = c(rep(0.20, 5)))

#bind all variables together in a dataset
mydat <- data.frame(ID = id,
                    treat = treatgrp,
                    hb = hemoglobin,
                    weight = skewedweights,
                    blood_rcv = received_blood,
                    insured = insurance_status,
                    race = race,
                    disp = hospdisp)


#make sure mydat is cleaned and coded appropriately.
str(mydat)
mydat$ID <- as.integer(mydat$ID)

#-----------------------------------------------------------------

#View the structure of the sample dataset mydat here
str(mydat)

# apply function 
 out <- easytable1(numerics = c("hb", "weight"),
                   logicals = c("insured", "blood_rcv"),
                   factors = c("race"),
                   data = mydat,
                   by = "treat")

out