## Main Source: https://juliasilge.com/blog/your-floor/
library(neiss)
data(injuries)
data(products)

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape2)


prod_bike <- subset(products, code==5040|code==5033|code==1202)
prod_bike
## # A tibble: 3 x 2
##    code                                   title
##   <int>                                   <chr>
## 1  1202                 bicycles or accessories
## 2  5033 mountain or all-terrain bicycles and ac
## 3  5040 bicycles and accessories (excl mountain
injuries_bike <- subset(injuries, prod1==1202| prod1==5040| prod1==5033)
dim(injuries_bike)
## [1] 90790    18
head(injuries_bike)
## # A tibble: 6 x 18
##   case_num  trmt_date   psu  weight stratum   age   sex               race
##      <int>     <date> <dbl>   <dbl>   <chr> <dbl> <chr>              <chr>
## 1 90105226 2009-01-01    73 70.8387       S    71  Male        None listed
## 2 90105248 2009-01-01     2 15.3491       V    53  Male              White
## 3 90105251 2009-01-01     2 15.3491       V     5  Male              White
## 4 90105715 2009-01-03    53 15.3491       V    42  Male              White
## 5 90106176 2009-01-03    61 15.3491       V    30  Male Other / Mixed Race
## 6 90108746 2009-01-01    95 15.3491       V    19  Male              White
## # ... with 10 more variables: race_other <chr>, diag <chr>,
## #   diag_other <chr>, body_part <chr>, disposition <chr>, location <chr>,
## #   fmv <chr>, prod1 <dbl>, prod2 <dbl>, narrative <chr>
library(ggplot2)
injuries_bike$location <- as.factor(injuries_bike$location)
whereinjury <- injuries_bike %>% group_by(location) %>% summarise(total = sum(weight))
ggplot(data = whereinjury, 
       aes(x = location, y = total)) +
        geom_bar(stat = "identity", fill = "red", alpha = 0.8)+theme_bw()+
        theme(legend.position="none", axis.title.x = element_blank(),
              axis.text.x= element_text(angle=45, hjust = 1)) +
        ylab("Estimated number of Bicycle injuries") +
        ggtitle("Location of Bicycle Injuries")

injuries_bike$sex <- as.factor(injuries_bike$sex)
whereinjury <- injuries_bike %>% group_by(location, sex) %>% summarise(total = sum(weight)) %>% 
        arrange(desc(total))
ggplot(data = whereinjury[whereinjury$sex != "None listed",], 
       aes(x = location, y = total, fill = sex)) +
        geom_bar(alpha = 0.8, stat = "identity", position = "dodge") + theme_bw()+
        scale_fill_manual(values = c("blue", "red")) + 
        theme(axis.title.x = element_blank(), legend.title=element_blank(),
              axis.text.x= element_text(angle=45, hjust = 1)) +
        ylab("Estimated number of injuries") +
        ggtitle("Location of Injuries")

sexageinjury <- injuries_bike %>% 
  group_by(sex, age = as.numeric(cut(age, breaks = (seq(0,100, by = 1))))-1) %>%
  summarise(total = sum(weight))
sexageinjury
## # A tibble: 195 x 3
## # Groups:   sex [?]
##       sex   age      total
##    <fctr> <dbl>      <dbl>
##  1 Female     0   699.1051
##  2 Female     1 14922.6936
##  3 Female     2 15081.4929
##  4 Female     3 21100.5816
##  5 Female     4 27460.5314
##  6 Female     5 33947.3199
##  7 Female     6 42851.4507
##  8 Female     7 44605.9621
##  9 Female     8 49560.7162
## 10 Female     9 47612.4176
## # ... with 185 more rows
medianpop <- population %>% filter(year >= 2009) %>% group_by(age, sex) %>% 
  summarise(n = median(n))

totalinjuries <- left_join(medianpop, sexageinjury, by = c("age" = "age"))
totalinjuries <- totalinjuries %>% filter(sex.x == tolower(sex.y)) %>% 
  select(age, sex = sex.x, population = n, injuries = total) 
totalinjuries 
## # A tibble: 170 x 4
## # Groups:   age [85]
##      age    sex population   injuries
##    <dbl>  <chr>      <int>      <dbl>
##  1     0 female    1932910   699.1051
##  2     0   male    2018420   623.3854
##  3     1 female    1938870 14922.6936
##  4     1   male    2023253 26739.2394
##  5     2 female    1950168 15081.4929
##  6     2   male    2042522 32020.4558
##  7     3 female    1969393 21100.5816
##  8     3   male    2056282 45991.8549
##  9     4 female    1992143 27460.5314
## 10     4   male    2083989 51646.2414
## # ... with 160 more rows
toiletinjury <- injuries_bike %>% 
        group_by(sex, age = as.numeric(cut(age, breaks = (seq(0,100, by = 1))))-1) %>%
        summarise(total = sum(weight))
totalinjuries <- left_join(medianpop, toiletinjury, by = c("age" = "age"))
totalinjuries <- totalinjuries %>% filter(sex.x == tolower(sex.y)) %>% 
        select(age, sex = sex.x, population = n, injuries = total) %>%
        mutate(rate = injuries/population*1e5) %>% 
        melt(id = c("age", "sex"), measure = c("injuries", "rate"))
levels(totalinjuries$variable) <- c("Estimated Number of Injuries", "Injury Rate per 100,000 Population")
ggplot(data = totalinjuries, 
       aes(x = age, y = value, color = sex)) +
        facet_wrap(~variable, ncol = 1, scales = "free_y") + 
        geom_line(size = 1.2, alpha = 0.9) +theme_bw()+
        scale_color_manual(values = c("blue", "red")) + 
        theme(legend.title=element_blank(), legend.justification=c(0,0.38), 
              legend.position=c(0,0.38)) +
        ylab("Number") + xlab("Age") + 
        ggtitle("Bicycle Related Injuries by Age and Sex")

table(injuries_bike$location)
## 
##                       Farm                       Home 
##                         16                      12668 
##           Industrial Place                Mobile Home 
##                          9                          1 
##      Other Public Property                     School 
##                       5363                        380 
## Sports Or Recreation Place          Street Or Highway 
##                       4478                      35017 
##                    Unknown 
##                      32858
whereinjury <- injuries_bike %>% group_by(location, sex, 
                                     age = as.numeric(cut(age, 
                                                          breaks = (seq(0,100, by = 1))))-1) %>%
        summarise(total = sum(weight))
whereinjury$location = factor(whereinjury$location,levels(whereinjury$location)[c(2,7,6,5,1,3,4,8,9)])
plotlocation <- c("Home", "Other Public Property",  "Sports Or Recreation Place",
                  "Street Or Highway")
ggplot(data = whereinjury[whereinjury$sex != "None listed" & 
                                  whereinjury$location %in% plotlocation,], 
       aes(x = age, y = total, color = sex)) +
        facet_wrap(~location, ncol = 1, scales = "free_y") + theme_bw()+
        geom_line(size = 1.2, alpha = 0.9) +
        scale_color_manual(values = c("blue", "red")) + 
        theme(legend.title=element_blank(), legend.justification=c(1,1), legend.position=c(1,1)) +
        ylab("Estimated Number of Bicycle injuries") + xlab("Age") + 
        ggtitle("Bicycle Injuries by Location, Age, and Sex")
## Warning: Removed 1 rows containing missing values (geom_path).