Introduction

This analysis explores the Longley dataset with summary statistics using gtsummary.

Loading Packages and Data

# Load required libraries
library(gtsummary)
library(dplyr)

# Load dataset
data("longley")
head(longley)
##      GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
## 1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
## 1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
## 1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
## 1950         89.5 284.599      335.1        165.0    110.929 1950   61.187
## 1951         96.2 328.975      209.9        309.9    112.075 1951   63.221
## 1952         98.1 346.999      193.2        359.4    113.270 1952   63.639

Data Preparation

# Add Year groups
df <- longley
df$y_group <- cut(df$Year, breaks = 3, labels = c("Group 1", "Group 2", "Group 3"))

head(df)
##      GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
## 1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
## 1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
## 1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
## 1950         89.5 284.599      335.1        165.0    110.929 1950   61.187
## 1951         96.2 328.975      209.9        309.9    112.075 1951   63.221
## 1952         98.1 346.999      193.2        359.4    113.270 1952   63.639
##      y_group
## 1947 Group 1
## 1948 Group 1
## 1949 Group 1
## 1950 Group 1
## 1951 Group 1
## 1952 Group 1

Summary Statistics Without Grouping

# Summary without grouping
df %>%
  select(GNP.deflator, GNP, Armed.Forces, Population, Year, Employed) %>%
  tbl_summary()
Characteristic N = 161
GNP.deflator 101 (93, 112)
GNP 381 (307, 464)
Armed.Forces 272 (208, 307)
Population 117 (112, 123)
Year 1,955 (1,951, 1,959)
Employed 65.50 (62.20, 68.41)
1 Median (Q1, Q3)

Summary Statistics by Year Group

# Summary by y_group
df %>%
  select(GNP.deflator, GNP, Armed.Forces, Population, Year, Employed, y_group) %>%
  tbl_summary(by = y_group)
Characteristic Group 1
N = 6
1
Group 2
N = 5
1
Group 3
N = 5
1
GNP.deflator 89 (88, 96) 101 (100, 105) 114 (113, 116)
GNP 272 (258, 329) 397 (365, 419) 503 (483, 518)
Armed.Forces 163 (159, 310) 305 (286, 335) 257 (255, 264)
Population 110 (109, 112) 117 (116, 119) 125 (123, 128)
Year 1,950 (1,948, 1,951) 1,955 (1,954, 1,956) 1,960 (1,959, 1,961)
Employed 61.15 (60.32, 63.22) 66.02 (64.99, 67.86) 69.33 (68.66, 69.56)
1 Median (Q1, Q3)

Summary with P-values

# Summary with p-values
df %>%
  select(GNP.deflator, GNP, Armed.Forces, Population, Year, Employed, y_group) %>%
  tbl_summary(by = y_group) %>%
  add_p()
Characteristic Group 1
N = 6
1
Group 2
N = 5
1
Group 3
N = 5
1
p-value2
GNP.deflator 89 (88, 96) 101 (100, 105) 114 (113, 116) 0.001
GNP 272 (258, 329) 397 (365, 419) 503 (483, 518) 0.001
Armed.Forces 163 (159, 310) 305 (286, 335) 257 (255, 264) 0.11
Population 110 (109, 112) 117 (116, 119) 125 (123, 128) 0.001
Year 1,950 (1,948, 1,951) 1,955 (1,954, 1,956) 1,960 (1,959, 1,961) 0.001
Employed 61.15 (60.32, 63.22) 66.02 (64.99, 67.86) 69.33 (68.66, 69.56) 0.002
1 Median (Q1, Q3)
2 Kruskal-Wallis rank sum test