Explore data from the National Electronic Injury Surveillance System (NEISS), collected by the Consumer Product Safety Commission. This is a long-term study that records all accidents seen in a representative sample of hospitals in the United States.
suppressPackageStartupMessages({
library(tidyverse)
library(ggplot2)
library(dplyr)
library(plotly)
library(vroom)
library(lubridate)
})
Download data from website
Read data from local directory
injuries <- vroom::vroom("/Users/nnthieu/neiss/injuries.tsv.gz")
## Rows: 255064 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): sex, race, body_part, diag, location, narrative
## dbl (3): age, prod_code, weight
## date (1): trmt_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(names(injuries))
## [1] "trmt_date" "age" "sex" "race" "body_part" "diag"
## [7] "location" "prod_code" "weight" "narrative"
head(injuries)
## # A tibble: 6 × 10
## trmt_date age sex race body_part diag location prod_code weight
## <date> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2017-01-01 71 male white Upper Trunk Contusion … Other P… 1807 77.7
## 2 2017-01-01 16 male white Lower Arm Burns, The… Home 676 77.7
## 3 2017-01-01 58 male white Upper Trunk Contusion … Home 649 77.7
## 4 2017-01-01 21 male white Lower Trunk Strain, Sp… Home 4076 77.7
## 5 2017-01-01 54 male white Head Inter Orga… Other P… 1807 77.7
## 6 2017-01-01 21 male white Hand Fracture Home 1884 77.7
## # ℹ 1 more variable: narrative <chr>
Each row represents a single accident with 10 variables:
‘trmt_date’ is date the person was seen in the hospital (not when the accident occurred).
age, sex, and race give demographic information about the person who experienced the accident.
‘body_part’ is the location of the injury on the body (like ankle or ear); location is the place where the accident occurred (like home or school).
‘diag’ gives the basic diagnosis of the injury (like fracture or laceration).
‘prod_code’ is the primary product associated with the injury.
‘weight’ is statistical weight giving the estimated number of people who would suffer this injury if this dataset was scaled to the entire population of the US.
‘narrative’ is a brief story about how the accident occurred.
injuries_by_weekday <- injuries |>
mutate(
day = wday(trmt_date, label = TRUE, abbr = FALSE)
) |>
count(day, name = "case_count") |>
mutate(
percent = round(100 * case_count / sum(case_count), 1)
) |>
arrange(day)
print(injuries_by_weekday)
## # A tibble: 7 × 3
## day case_count percent
## <ord> <int> <dbl>
## 1 Sunday 37916 14.9
## 2 Monday 37850 14.8
## 3 Tuesday 36172 14.2
## 4 Wednesday 36241 14.2
## 5 Thursday 35549 13.9
## 6 Friday 34852 13.7
## 7 Saturday 36484 14.3
Not different in numbers of injuries by week days
injuries_by_bodypart <- injuries |>
group_by(body_part) |>
summarise(
case_count = n(),
.groups = "drop"
) |>
mutate(
percent = round(100 * case_count / sum(case_count), 1)
) |>
arrange(desc(case_count))
print(injuries_by_bodypart)
## # A tibble: 26 × 3
## body_part case_count percent
## <chr> <int> <dbl>
## 1 Head 46522 18.2
## 2 Lower Trunk 23000 9
## 3 Face 22017 8.6
## 4 Finger 18350 7.2
## 5 Ankle 16895 6.6
## 6 Knee 15175 5.9
## 7 Upper Trunk 14513 5.7
## 8 Foot 11188 4.4
## 9 Shoulder 10448 4.1
## 10 Hand 10079 4
## # ℹ 16 more rows
The most body parts having injuries are head, lower trunk, face, finger, and ankle.
injuries_by_bodypart_sex <- injuries |>
group_by(sex, body_part) |>
summarise(
case_count = n(),
.groups = "drop_last"
) |>
mutate(
percent = round(100 * case_count / sum(case_count), 1)
) |>
arrange(sex, desc(case_count)) |>
group_by(sex) |>
slice_head(n = 5)
print(injuries_by_bodypart_sex)
## # A tibble: 10 × 4
## # Groups: sex [2]
## sex body_part case_count percent
## <chr> <chr> <int> <dbl>
## 1 female Head 21753 18.4
## 2 female Lower Trunk 12876 10.9
## 3 female Face 8997 7.6
## 4 female Ankle 8607 7.3
## 5 female Knee 7431 6.3
## 6 male Head 24769 18.1
## 7 male Face 13020 9.5
## 8 male Finger 11066 8.1
## 9 male Lower Trunk 10124 7.4
## 10 male Ankle 8288 6
# weighted for weight
selected <- injuries %>% filter(prod_code == 649) # pro_code for "toilets"
selected %>% count(location, name = "case_counts", wt = weight, sort = TRUE)
## # A tibble: 6 × 2
## location case_counts
## <chr> <dbl>
## 1 Home 99603.
## 2 Other Public Property 18663.
## 3 Unknown 16267.
## 4 School 659.
## 5 Street Or Highway 16.2
## 6 Sports Or Recreation Place 14.8
Not weighted for weight
# not weighted for weight
selected_by_location <- selected |>
count(location, name = "case_count") |>
mutate(
percent = round(100 * case_count / sum(case_count), 1)
) |>
arrange(desc(case_count))
print(selected_by_location)
## # A tibble: 6 × 3
## location case_count percent
## <chr> <int> <dbl>
## 1 Home 2142 71.6
## 2 Unknown 431 14.4
## 3 Other Public Property 388 13
## 4 School 30 1
## 5 Sports Or Recreation Place 1 0
## 6 Street Or Highway 1 0
# not weighted for weight
selected_by_bodypart <- selected |>
count(body_part, name = "case_count") |>
mutate(
percent = round(100 * case_count / sum(case_count), 1)
) |>
arrange(desc(case_count))
print(selected_by_bodypart)
## # A tibble: 24 × 3
## body_part case_count percent
## <chr> <int> <dbl>
## 1 Head 756 25.3
## 2 Lower Trunk 555 18.5
## 3 Face 295 9.9
## 4 Upper Trunk 259 8.7
## 5 Knee 144 4.8
## 6 N.S./Unk 135 4.5
## 7 Lower Leg 107 3.6
## 8 All Of Body 75 2.5
## 9 Pubic Region 73 2.4
## 10 Ankle 71 2.4
## # ℹ 14 more rows
summary <- selected %>%
count(age, sex, wt = weight)
summary %>%
ggplot(aes(age, n, colour = sex)) +
geom_line() +
labs(y = "Estimated number of injuries")
population <- read_tsv("/Users/nnthieu/neiss/population.tsv")
## Rows: 170 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): sex
## dbl (2): age, population
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary <- selected %>%
count(age, sex, wt = weight) %>%
left_join(population, by = c("age", "sex")) %>%
mutate(rate = n / population * 1e4)
summary %>%
ggplot(aes(age, rate, colour = sex)) +
geom_line(na.rm = TRUE) +
labs(y = "Injuries per 10,000 people")
selected %>%
sample_n(10) %>%
pull(narrative)
## [1] "41-YOM S/P SEVERAL ALCOHOLIC BEVERAGES, FELL OFF TOILET, STRIKING HEAD& CHIN. DX: SYNCOPE & COLLAPSE, CHIN LACERATION, FOREHEAD CONTUSION"
## [2] "90 YOF C/O PAIN RIGHT THIGH, FELL WHILE ATTEMPTING TO PULL PANTS BACKUP WHILE ON THE TOILET, DX: THIGH PAIN"
## [3] "91YOF WITH CHI FROM FALL OFF TOILET"
## [4] "91YOF AT THE NURSING HOME AND WENT TO SIT ON THE TOILET AND MISSED IT FELL- TO THE ED FOR FALL EVALUATION NO INJURY ADMITTED FOR WEAKNESS"
## [5] "96YF H/O B/L HIP&KNEE REPLACEMENT, GETTING OFF OF THE COMMODE LEGS GAVEOUT&FELL>>FOOT FX"
## [6] "52 YOM LAC TO FINGER WHEN WORKING ON THE TOILET"
## [7] "A 82YOM FELL ASLEEP WHILE ON TOILET, SUSTAINED HEMATOMA TO EYE"
## [8] "KNEE PAIN. 81YOF PAIN IN BOTH KNEES WHEN FELL OFF TOILET ONTO THE BATHROOM FLOOR AT HOME."
## [9] "89 YOM FELL WHILE GETTING UP FROM TOILET AT ALF.DX: HIP PX, HEMIPLEGIA, OSTEOARTHRITIS HIP."
## [10] "LAC.HEAD.88YOF.SITTING ON TOILET LOST BALANCE FALLING DOWN"