Basic Setup
Set up libraries
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
#library(ggmap)
Read Data and Select Relevant Columns
AllAbuseData <- read.csv(file="C:/MSDA/D607/Project2/DrugFatalitiesCT/Accidental_Drug_Related_Deaths__2012-2016.csv")
head(AllAbuseData)
## CaseNumber Date Sex Race Age Residence.City Residence.State
## 1 Dec-43 3/23/2012 Male White 22 ONECO
## 2 Dec-08 2/21/2012 Male White 28 WINCHESTER
## 3 5/20/2016 Male White 36 AVON CT
## 4 15-13536 8/20/2015 Male Black 63 NEW HAVEN CT
## 5 12-12217 8/29/2012 Male White 39 MIDDLETOWN
## 6 15-6342 4/11/2015 Male White 46 NEWTOWN CT
## Residence.County Death.City Death.State Death.County Location
## 1 WINDHAM MOOSUP WINDHAM Other
## 2 LITCHFIELD WINCHESTER LITCHFIELD
## 3 HARTFORD AVON CT HARTFORD Residence
## 4 NEW HAVEN NEW HAVEN CT NEW HAVEN Residence
## 5 MIDDLESEX MIDDLETOWN MIDDLESEX Residence
## 6 FAIRFIELD NEWTOWN CT FAIRFIELD Residence
## DescriptionofInjury InjuryPlace
## 1 Other
## 2 Other
## 3 Residence
## 4 Residence
## 5 Residence
## 6 Residence
## ImmediateCauseA
## 1 Heroin Toxicity
## 2 Oxycodone Toxicity
## 3 Acute Oxycodone Intoxication
## 4 Liver Failure due to cirrhosis due to chronic hepatitis C (OSC Heroin)
## 5 Opiate Toxicity
## 6 Intoxication due to the Combined Effects of Diazepam, Temazepam, and Fentanyl
## Heroin Cocaine Fentanyl Oxycodone Oxymorphone EtOH Hydro.codeine
## 1 Y
## 2 Y
## 3 Y
## 4 Y
## 5 Y Y Y
## 6 Y
## Benzodiazepine Methadone Amphet Tramad Morphine..not.heroin. Other
## 1
## 2
## 3
## 4
## 5
## 6 Y
## Any.Opioid MannerofDeath AmendedMannerofDeath
## 1 Accident NA
## 2 Accident NA
## 3 Accident NA
## 4 Y Accident NA
## 5 Accident NA
## 6 Y Accident NA
## DeathLoc
## 1 Moosup, CT\n(41.712872, -71.881207)
## 2 Winchester, CT\n(41.901973, -73.133851)
## 3 Avon, CT\n(41.809641, -72.830547)
## 4 New Haven, CT\n(41.308252, -72.924161)
## 5 Middletown, CT\n(41.544654, -72.651713)
## 6 Newtown, CT\n(41.413516, -73.308842)
names(AllAbuseData)
## [1] "CaseNumber" "Date"
## [3] "Sex" "Race"
## [5] "Age" "Residence.City"
## [7] "Residence.State" "Residence.County"
## [9] "Death.City" "Death.State"
## [11] "Death.County" "Location"
## [13] "DescriptionofInjury" "InjuryPlace"
## [15] "ImmediateCauseA" "Heroin"
## [17] "Cocaine" "Fentanyl"
## [19] "Oxycodone" "Oxymorphone"
## [21] "EtOH" "Hydro.codeine"
## [23] "Benzodiazepine" "Methadone"
## [25] "Amphet" "Tramad"
## [27] "Morphine..not.heroin." "Other"
## [29] "Any.Opioid" "MannerofDeath"
## [31] "AmendedMannerofDeath" "DeathLoc"
Tidy And Analyse the data for Drug cause
DeathCauseTidy <- select(AllAbuseData, CaseNumber, Heroin:Other)
DeathCauseTidy <- DeathCauseTidy %>% gather(DrugName, Present, Heroin:Other)
## Warning: attributes are not identical across measure variables; they will
## be dropped
DeathCauseTidy <- DeathCauseTidy %>% filter(Present!="") %>% select(CaseNumber, DrugName)
DeathCauseTidy <- DeathCauseTidy %>% group_by(DrugName)
DeathCauseDrugCount <- DeathCauseTidy %>% summarise(count=n())
DeathCauseDrugCount %>% arrange(desc(count))
## # A tibble: 13 × 2
## DrugName count
## <chr> <int>
## 1 Heroin 1743
## 2 Cocaine 844
## 3 Fentanyl 800
## 4 Benzodiazepine 752
## 5 EtOH 700
## 6 Oxycodone 455
## 7 Other 323
## 8 Methadone 288
## 9 Hydro.codeine 91
## 10 Oxymorphone 88
## 11 Amphet 64
## 12 Tramad 62
## 13 Morphine..not.heroin. 35
The most common form of drug death is Heroin followed by Cocaine and Fentanyl
Tidy And Analyse the data for Race Background
BackgroundTidy <- select(AllAbuseData, CaseNumber, Sex, Race, Age)
head(BackgroundTidy)
## CaseNumber Sex Race Age
## 1 Dec-43 Male White 22
## 2 Dec-08 Male White 28
## 3 Male White 36
## 4 15-13536 Male Black 63
## 5 12-12217 Male White 39
## 6 15-6342 Male White 46
RaceTidy <- BackgroundTidy %>% separate(Race, sep=",", into = c("PrimaryRace","SecondaryRace"))
## Warning: Too few values at 2706 locations: 1, 2, 3, 4, 5, 6, 7, 8, 10, 11,
## 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, ...
head(RaceTidy)
## CaseNumber Sex PrimaryRace SecondaryRace Age
## 1 Dec-43 Male White <NA> 22
## 2 Dec-08 Male White <NA> 28
## 3 Male White <NA> 36
## 4 15-13536 Male Black <NA> 63
## 5 12-12217 Male White <NA> 39
## 6 15-6342 Male White <NA> 46
PrimaryRaceCount<-RaceTidy %>% group_by(PrimaryRace) %>% summarise(count=n())
colnames(PrimaryRaceCount)[1]<-"Race"
PrimaryRaceCount$Race<-trimws(PrimaryRaceCount$Race)
head(PrimaryRaceCount)
## # A tibble: 6 × 2
## Race count
## <chr> <int>
## 1 9
## 2 Asian 11
## 3 Asian Indian 3
## 4 Black 241
## 5 Chinese 2
## 6 Hawaiian 1
SecRaceCount<-RaceTidy %>% group_by(SecondaryRace) %>% summarise(count=n())
colnames(SecRaceCount)[1]<-"Race"
SecRaceCount$Race<-trimws(SecRaceCount$Race)
AllRaceCount <- left_join(x=PrimaryRaceCount, y=SecRaceCount, "Race")
AllRaceCount$count.y[is.na(AllRaceCount$count.y)] <- 0
head(AllRaceCount)
## # A tibble: 6 × 3
## Race count.x count.y
## <chr> <int> <dbl>
## 1 9 0
## 2 Asian 11 0
## 3 Asian Indian 3 0
## 4 Black 241 9
## 5 Chinese 2 0
## 6 Hawaiian 1 0
AllRaceCount <- AllRaceCount %>% mutate(total=count.x+count.y)
AllRaceCount %>% arrange(total)
## # A tibble: 11 × 4
## Race count.x count.y total
## <chr> <int> <dbl> <dbl>
## 1 Hawaiian 1 0 1
## 2 Native American 1 0 1
## 3 Chinese 2 0 2
## 4 Asian Indian 3 0 3
## 5 9 0 9
## 6 Asian 11 0 11
## 7 Unknown 13 0 13
## 8 Other 6 12 18
## 9 Black 241 9 250
## 10 Hispanic 327 0 327
## 11 White 2431 318 2749
The biggest abuse is seen in WHITE population. But this needs to be further analysed in terms of state population to ascertain what percentage of population of a specific Race is more affected.
Tidy And Analyse the data for Age Background
BackgroundTidy <- select(AllAbuseData, CaseNumber, Age)
BackgroundTidy<-BackgroundTidy %>% mutate(Age=as.integer(Age))
head(BackgroundTidy)
## CaseNumber Age
## 1 Dec-43 22
## 2 Dec-08 28
## 3 36
## 4 15-13536 63
## 5 12-12217 39
## 6 15-6342 46
BackgroundTidyBelow20 <- BackgroundTidy %>% filter(Age<=20) %>% summarise(count=n())
BackgroundTidyBelow20$AgeGroup <- "Below20"
BackgroundTidy2140 <- BackgroundTidy %>% filter(Age>20 & Age<=40) %>% summarise(count=n())
BackgroundTidy2140$AgeGroup <- "21-40"
BackgroundTidy4160 <- BackgroundTidy %>% filter(Age>40 & Age<=60) %>% summarise(count=n())
BackgroundTidy4160$AgeGroup <- "41-60"
BackgroundTidyAbove60 <- BackgroundTidy %>% filter(Age>60) %>% summarise(count=n())
BackgroundTidyAbove60$AgeGroup <- "Above60"
AllAgeGroups <- bind_rows(BackgroundTidyBelow20, BackgroundTidy2140, BackgroundTidy4160, BackgroundTidyAbove60)
AllAgeGroups <- AllAgeGroups[c("AgeGroup", "count")]
AllAgeGroups
## AgeGroup count
## 1 Below20 65
## 2 21-40 1338
## 3 41-60 1475
## 4 Above60 165
As noted above no major difference seen in drug abuse between Ages above and below 40.