Table One

Table Construction

Subgroups of data

Now you are ready to build your table one. We will build a table one with three columns: one including the overall summary data and the next two representing different population subsets.

This first block of code is for building the different population subset columns

#This code chunk will help reorganize the row in the order you desire. 
#Otherwise the rows will be printed in alaphabetical order
desired_col_order <- c("Total", "Male", "Female", "All (m/sd)", "Males (m/sd)", 
                       "Females (m/sd)", "Hispanic", "White", "Black", "Asian", 
                       "Native American", "Other", "Not Available", "Spanish", 
                       "English", "Both", " Other", " Not Available")

#This code chunk manipulates and summarized your data_file. select()- place the columns 
#of interest. mutate()- create your desired population subgroups, group_by()- enter in the 
#col_name your created in the mutate() step, summarise()- summarize the columns how you please, 
#unite()- combine two columns (ex mean and sd), gather spread rotates your table into a classic 
#table 1 format, slice()- uses the desired_col_order you defined above to reorder the rows
diff_pop_subsets <- data_file %>%
  select(col_1_age, col_2_gender, col_3_race, col_4_lang) %>%
  mutate(pop_subset = case_when(col_1_age <= 17 ~ "Pediatric pts",
                                col_1_age >= 18 ~ "Adults"
                              )) %>%
  group_by(pop_subset) %>%
  summarise(AGE = round(mean(col_1_age, na.rm=TRUE), 0),
            asd = round(sd(col_1_age, na.rm = TRUE), 1),
            Males = round(mean(col_1_age[col_2_gender == "M"], na.rm = TRUE), 0), 
            Msd = round(sd(col_1_age[col_2_gender == "M"], na.rm = TRUE), 1),
            Females = round(mean(col_1_age[col_2_gender == "F"], na.rm = TRUE), 0),
            Fsd = round(sd(col_1_age[col_2_gender =="F"], na.rm = TRUE), 1),
            Male = sum(col_2_gender == "M"),
            Female = sum(col_2_gender == "F"),
            Total = sum(col_2_gender == "F" | col_2_gender == "M"),
            Hispanic = sum(col_3_race == "Hispanic"),
            White = sum(col_3_race == "White"),
            Black = sum(col_3_race == "Black"),
            Asian = sum(col_3_race == "Asian"),
            "Native American" = sum(col_3_race == "Native American"),
            Other = sum(Race = sum(col_3_race == "Other")),
            "Not Available" = sum(col_3_race == "N/A"),
            Spanish = sum(col_4_lang == "Spanish"),
            English = sum(col_4_lang == "English"),
            Both = sum(col_4_lang == "Spanish or English"),
            " Other" = sum(col_4_lang == "sign language" | col_4_lang == "Other"),
            " Not Available" = sum(col_4_lang == "n/a" | col_4_lang == "N/A")
            
        )  %>%
  unite(col = "All (m/sd)", AGE, asd, sep= " / ") %>%
  unite(col = "Males (m/sd)", Males, Msd, sep= " / ") %>%
  unite(col = "Females (m/sd)", Females, Fsd, sep= " / ") %>%
  gather (., key = "Variable", value = "value", -pop_subset) %>%
  spread(., key = pop_subset, value = value) %>%
  slice(match(desired_col_order, Variable)) #%>%

#print your table if you want to preview 
 diff_pop_subsets

## # A tibble: 18 x 3
##    Variable         Adults    `Pediatric pts`
##    <chr>            <chr>     <chr>          
##  1 Total            638       122            
##  2 Male             100       54             
##  3 Female           538       68             
##  4 All (m/sd)       44 / 12.5 10 / 4.8       
##  5 Males (m/sd)     43 / 15.2 9 / 5.2        
##  6 Females (m/sd)   44 / 11.9 11 / 4.2       
##  7 Hispanic         582       108            
##  8 White            19        8              
##  9 Black            1         0              
## 10 Asian            1         0              
## 11 Native American  1         0              
## 12 Other            2         0              
## 13 Not Available    32        6              
## 14 Spanish          445       62             
## 15 English          76        25             
## 16 Both             19        17             
## 17 " Other"         8         0              
## 18 " Not Available" 89        18

Overall data

This block of code is for building the overall column for your table 1 (including both population subsets from above)

#This code chunk will help reorganize the row in the order you desire. Otherwise the rows 
#will be printed in alaphabetical order
desired_col_order <- c("Total", "Male", "Female", "All (m/sd)", "Males (m/sd)", 
                       "Females (m/sd)", "Hispanic", "White", "Black", "Asian", 
                       "Native American", "Other", "Not Available", "Spanish", 
                       "English", "Both", " Other", " Not Available")


#This code chunk manipulates and summarized your data_file. Note you will not need to use 
#mutate(), group_by(), or spread(). select()- place the columns of interest, summarise()- summarize 
#the columns how you please, unite()- combine two columns (ex mean and sd), gather rotates your 
#table into a classic table 1 format, slice()- uses the desired_col_order you defined above to 
#reorder the rows
overall_summary <- data_file %>%
  select(col_1_age, col_2_gender, col_3_race, col_4_lang) %>%
#  mutate(pop_subset = case_when(col_1_age <= 17 ~ "Pediatric pts",
#                                col_1_age >= 18 ~ "Adults"
#                              )) %>%
#  group_by(pop_subset) %>%
  summarise(AGE = round(mean(col_1_age, na.rm=TRUE), 0),
            asd = round(sd(col_1_age, na.rm = TRUE), 1),
            Males = round(mean(col_1_age[col_2_gender == "M"], na.rm = TRUE), 0), 
            Msd = round(sd(col_1_age[col_2_gender == "M"], na.rm = TRUE), 1),
            Females = round(mean(col_1_age[col_2_gender == "F"], na.rm = TRUE), 0),
            Fsd = round(sd(col_1_age[col_2_gender =="F"], na.rm = TRUE), 1),
            Male = sum(col_2_gender == "M"),
            Female = sum(col_2_gender == "F"),
            Total = sum(col_2_gender == "F" | col_2_gender == "M"),
            Hispanic = sum(col_3_race == "Hispanic"),
            White = sum(col_3_race == "White"),
            Black = sum(col_3_race == "Black"),
            Asian = sum(col_3_race == "Asian"),
            "Native American" = sum(col_3_race == "Native American"),
            Other = sum(Race = sum(col_3_race == "Other")),
            "Not Available" = sum(col_3_race == "N/A"),
            Spanish = sum(col_4_lang == "Spanish"),
            English = sum(col_4_lang == "English"),
            Both = sum(col_4_lang == "Spanish or English"),
            " Other" = sum(col_4_lang == "sign language" | col_4_lang == "Other"),
            " Not Available" = sum(col_4_lang == "n/a" | col_4_lang == "N/A")
            
        )  %>%
  unite(col = "All (m/sd)", AGE, asd, sep= " / ") %>%
  unite(col = "Males (m/sd)", Males, Msd, sep= " / ") %>%
  unite(col = "Females (m/sd)", Females, Fsd, sep= " / ") %>%
  gather (., key = "Variable", value = "All Patients") %>%
#  spread(., key = pop_subset, value = value) %>%
  slice(match(desired_col_order, Variable)) #%>%


#print your table if you want to preview 
overall_summary

## # A tibble: 18 x 2
##    Variable         `All Patients`
##    <chr>            <chr>         
##  1 Total            760           
##  2 Male             154           
##  3 Female           606           
##  4 All (m/sd)       38 / 16.9     
##  5 Males (m/sd)     31 / 20.3     
##  6 Females (m/sd)   40 / 15.3     
##  7 Hispanic         690           
##  8 White            27            
##  9 Black            1             
## 10 Asian            1             
## 11 Native American  1             
## 12 Other            2             
## 13 Not Available    38            
## 14 Spanish          507           
## 15 English          101           
## 16 Both             36            
## 17 " Other"         8             
## 18 " Not Available" 107

Combine data frames

Here you will combine the two data frames you have made (the diff_pop_subsets data frame and the overall data frame)

tab1 <- merge(diff_pop_subsets, overall_summary, by = "Variable", sort = FALSE)

#this step reorganizes you columns
tab1Reorder <- tab1[, c(1,4,2, 3)]
write.csv(tab1Reorder, "tab1.csv")

#When you knit to PDF, this step makes a publication like table one. It looks flashy 
#and can be used for internal use. However, many journals prefer an editable version. As result, 
#you will skip this step, proceed below, and knit to word
 tab1Reorder %>%
  kable(format= "latex", caption = "Clinic Demographics", booktabs = T) %>%
  kable_styling(latex_options = "striped", full_width = F) %>%
  column_spec(1, width = "5cm") %>%
  column_spec(2, width = "2cm") %>%
  column_spec(3, width = "2cm") %>%
  group_rows("Counts", 1, 3) %>%
  group_rows("Ages", 4, 6) %>%
  group_rows("Race/Ethnicity", 7, 13) %>%
  group_rows("Language", 14, 18) %>%
  add_footnote(c("Adults is 18+", "Peds is 17 & under",
                 "(m/sd) is mean and standard deviation"), 
               notation = "symbol") %>%
  landscape()

#this step creates an editable table. Knit this to word.
tab1Reorder %>% kable()

Variable	All Patients	Adults	Pediatric pts
Total	760	638	122
Male	154	100	54
Female	606	538	68
All (m/sd)	38 / 16.9	44 / 12.5	10 / 4.8
Males (m/sd)	31 / 20.3	43 / 15.2	9 / 5.2
Females (m/sd)	40 / 15.3	44 / 11.9	11 / 4.2
Hispanic	690	582	108
White	27	19	8
Black	1	1	0
Asian	1	1	0
Native American	1	1	0
Other	2	2	0
Not Available	38	32	6
Spanish	507	445	62
English	101	76	25
Both	36	19	17
Other	8	8	0
Not Available	107	89	18

Appendix

This step is to make organize the data in the columns as mean (SD) rather than mean / SD. This is not the most efficient way to achieve this goal. Please feel free to improve code

reordertab1 <- tab1[, c(1, 4, 2, 3)]

#Now i need to add parenthesis around the SD or %. For this is must seperate, add, parenth, and then re-unite
reorg_reorder <- reordertab1 %>%
  separate(col = "All Patients", into= c("all", "all_sd"), sep = "/") %>%
  separate(col = "Adults", into= c("adults", "adults_sd"), sep = "/") %>%
  separate(col = "Pediatric pts", into= c("ped", "ped_sd"), sep = "/") 

reorg_reorder[c(4:6),3] <- paste0("(", format(unlist(reorg_reorder[c(4:6),3])),")")
reorg_reorder[c(4:6),5] <- paste0("(", format(unlist(reorg_reorder[c(4:6),5])),")")
reorg_reorder[c(4:6),7] <- paste0("(", format(unlist(reorg_reorder[c(4:6),7])),")")

reorg_reorder2 <- reorg_reorder %>%
  replace_na(list(all_sd = "", adults_sd = "", ped_sd = "")) %>%
  unite(col = "All Patients", all, all_sd , sep = "") %>%
  unite(col = "Adults", adults, adults_sd , sep = "") %>%
  unite(col = "Pediatric pts", ped, ped_sd , sep = "")

Table One

Cyrus Elahi

Jan 17, 2018

Set up your R environment

Load in your data file

Table Construction

Subgroups of data

Overall data

Combine data frames

Appendix