Introduction

Explore data from the National Electronic Injury Surveillance System (NEISS), collected by the Consumer Product Safety Commission. This is a long-term study that records all accidents seen in a representative sample of hospitals in the United States.

Load data

suppressPackageStartupMessages({
library(tidyverse)
library(ggplot2)
library(dplyr)
library(plotly)
library(vroom)
library(lubridate)
})

Download data from website

Read data from local directory

injuries <- vroom::vroom("/Users/nnthieu/neiss/injuries.tsv.gz")
## Rows: 255064 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (6): sex, race, body_part, diag, location, narrative
## dbl  (3): age, prod_code, weight
## date (1): trmt_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(names(injuries))
##  [1] "trmt_date" "age"       "sex"       "race"      "body_part" "diag"     
##  [7] "location"  "prod_code" "weight"    "narrative"
head(injuries)
## # A tibble: 6 × 10
##   trmt_date    age sex   race  body_part   diag        location prod_code weight
##   <date>     <dbl> <chr> <chr> <chr>       <chr>       <chr>        <dbl>  <dbl>
## 1 2017-01-01    71 male  white Upper Trunk Contusion … Other P…      1807   77.7
## 2 2017-01-01    16 male  white Lower Arm   Burns, The… Home           676   77.7
## 3 2017-01-01    58 male  white Upper Trunk Contusion … Home           649   77.7
## 4 2017-01-01    21 male  white Lower Trunk Strain, Sp… Home          4076   77.7
## 5 2017-01-01    54 male  white Head        Inter Orga… Other P…      1807   77.7
## 6 2017-01-01    21 male  white Hand        Fracture    Home          1884   77.7
## # ℹ 1 more variable: narrative <chr>

Each row represents a single accident with 10 variables:

Exploration

Injuries by week day

injuries_by_weekday <- injuries |>
  mutate(
    day = wday(trmt_date, label = TRUE, abbr = FALSE)
  ) |>
  count(day, name = "case_count") |>
  mutate(
    percent = round(100 * case_count / sum(case_count), 1)
  ) |>
  arrange(day)

print(injuries_by_weekday)
## # A tibble: 7 × 3
##   day       case_count percent
##   <ord>          <int>   <dbl>
## 1 Sunday         37916    14.9
## 2 Monday         37850    14.8
## 3 Tuesday        36172    14.2
## 4 Wednesday      36241    14.2
## 5 Thursday       35549    13.9
## 6 Friday         34852    13.7
## 7 Saturday       36484    14.3

Not different in numbers of injuries by week days

Injuries by body parts

injuries_by_bodypart <- injuries |>
  group_by(body_part) |>
  summarise(
    case_count = n(),
    .groups = "drop"
  ) |>
  mutate(
    percent = round(100 * case_count / sum(case_count), 1)
  ) |>
  arrange(desc(case_count))

print(injuries_by_bodypart)
## # A tibble: 26 × 3
##    body_part   case_count percent
##    <chr>            <int>   <dbl>
##  1 Head             46522    18.2
##  2 Lower Trunk      23000     9  
##  3 Face             22017     8.6
##  4 Finger           18350     7.2
##  5 Ankle            16895     6.6
##  6 Knee             15175     5.9
##  7 Upper Trunk      14513     5.7
##  8 Foot             11188     4.4
##  9 Shoulder         10448     4.1
## 10 Hand             10079     4  
## # ℹ 16 more rows

The most body parts having injuries are head, lower trunk, face, finger, and ankle.

Injuries by sex and body parts

injuries_by_bodypart_sex <- injuries |>
  group_by(sex, body_part) |>
  summarise(
    case_count = n(),
    .groups = "drop_last"
  ) |>
  mutate(
    percent = round(100 * case_count / sum(case_count), 1)
  ) |>
  arrange(sex, desc(case_count)) |>
  group_by(sex) |>
  slice_head(n = 5)

print(injuries_by_bodypart_sex)
## # A tibble: 10 × 4
## # Groups:   sex [2]
##    sex    body_part   case_count percent
##    <chr>  <chr>            <int>   <dbl>
##  1 female Head             21753    18.4
##  2 female Lower Trunk      12876    10.9
##  3 female Face              8997     7.6
##  4 female Ankle             8607     7.3
##  5 female Knee              7431     6.3
##  6 male   Head             24769    18.1
##  7 male   Face             13020     9.5
##  8 male   Finger           11066     8.1
##  9 male   Lower Trunk      10124     7.4
## 10 male   Ankle             8288     6

Patients who injured in toilets

Injuries by location weighted for “weight”

# weighted for weight
selected <- injuries %>% filter(prod_code == 649) # pro_code for "toilets"
selected %>% count(location, name = "case_counts", wt = weight, sort = TRUE)
## # A tibble: 6 × 2
##   location                   case_counts
##   <chr>                            <dbl>
## 1 Home                           99603. 
## 2 Other Public Property          18663. 
## 3 Unknown                        16267. 
## 4 School                           659. 
## 5 Street Or Highway                 16.2
## 6 Sports Or Recreation Place        14.8

Not weighted for weight

# not weighted for weight
selected_by_location <- selected |>
  count(location, name = "case_count") |>
  mutate(
    percent = round(100 * case_count / sum(case_count), 1)
  ) |>
  arrange(desc(case_count))

print(selected_by_location)
## # A tibble: 6 × 3
##   location                   case_count percent
##   <chr>                           <int>   <dbl>
## 1 Home                             2142    71.6
## 2 Unknown                           431    14.4
## 3 Other Public Property             388    13  
## 4 School                             30     1  
## 5 Sports Or Recreation Place          1     0  
## 6 Street Or Highway                   1     0

Injuries by body parts

# not weighted for weight
selected_by_bodypart <- selected |>
  count(body_part, name = "case_count") |>
  mutate(
    percent = round(100 * case_count / sum(case_count), 1)
  ) |>
  arrange(desc(case_count))

print(selected_by_bodypart)
## # A tibble: 24 × 3
##    body_part    case_count percent
##    <chr>             <int>   <dbl>
##  1 Head                756    25.3
##  2 Lower Trunk         555    18.5
##  3 Face                295     9.9
##  4 Upper Trunk         259     8.7
##  5 Knee                144     4.8
##  6 N.S./Unk            135     4.5
##  7 Lower Leg           107     3.6
##  8 All Of Body          75     2.5
##  9 Pubic Region         73     2.4
## 10 Ankle                71     2.4
## # ℹ 14 more rows
summary <- selected %>% 
  count(age, sex, wt = weight)
summary %>% 
  ggplot(aes(age, n, colour = sex)) + 
  geom_line() + 
  labs(y = "Estimated number of injuries")

Injury incidence by sex

population <- read_tsv("/Users/nnthieu/neiss/population.tsv")
## Rows: 170 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): sex
## dbl (2): age, population
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary <- selected %>% 
  count(age, sex, wt = weight) %>% 
  left_join(population, by = c("age", "sex")) %>% 
  mutate(rate = n / population * 1e4)

summary %>% 
  ggplot(aes(age, rate, colour = sex)) + 
  geom_line(na.rm = TRUE) + 
  labs(y = "Injuries per 10,000 people")

Sample narratives

selected %>% 
  sample_n(10) %>% 
  pull(narrative)
##  [1] "41-YOM S/P SEVERAL ALCOHOLIC BEVERAGES, FELL OFF TOILET, STRIKING HEAD& CHIN.  DX:  SYNCOPE & COLLAPSE, CHIN LACERATION, FOREHEAD CONTUSION"
##  [2] "90 YOF C/O PAIN RIGHT THIGH, FELL WHILE ATTEMPTING TO PULL PANTS BACKUP WHILE ON THE TOILET, DX: THIGH PAIN"                                
##  [3] "91YOF WITH  CHI FROM FALL OFF TOILET"                                                                                                       
##  [4] "91YOF AT THE NURSING HOME AND WENT TO SIT ON THE TOILET AND MISSED IT FELL- TO THE ED FOR FALL EVALUATION NO INJURY ADMITTED FOR WEAKNESS"  
##  [5] "96YF H/O B/L HIP&KNEE REPLACEMENT, GETTING OFF OF THE COMMODE LEGS GAVEOUT&FELL>>FOOT FX"                                                   
##  [6] "52 YOM LAC TO FINGER WHEN WORKING ON THE TOILET"                                                                                            
##  [7] "A 82YOM FELL ASLEEP WHILE ON TOILET, SUSTAINED HEMATOMA TO EYE"                                                                             
##  [8] "KNEE PAIN.  81YOF PAIN IN BOTH KNEES WHEN FELL OFF TOILET ONTO THE BATHROOM FLOOR AT HOME."                                                 
##  [9] "89 YOM FELL WHILE GETTING UP  FROM TOILET AT ALF.DX:  HIP PX, HEMIPLEGIA, OSTEOARTHRITIS HIP."                                              
## [10] "LAC.HEAD.88YOF.SITTING ON TOILET LOST BALANCE FALLING DOWN"