Basic Setup

Set up libraries

library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
#library(ggmap)

Read Data and Select Relevant Columns

AllAbuseData <- read.csv(file="C:/MSDA/D607/Project2/DrugFatalitiesCT/Accidental_Drug_Related_Deaths__2012-2016.csv")
head(AllAbuseData)
##   CaseNumber      Date  Sex  Race Age Residence.City Residence.State
## 1     Dec-43 3/23/2012 Male White  22          ONECO                
## 2     Dec-08 2/21/2012 Male White  28     WINCHESTER                
## 3            5/20/2016 Male White  36           AVON              CT
## 4   15-13536 8/20/2015 Male Black  63      NEW HAVEN              CT
## 5   12-12217 8/29/2012 Male White  39     MIDDLETOWN                
## 6    15-6342 4/11/2015 Male White  46        NEWTOWN              CT
##   Residence.County Death.City Death.State Death.County  Location
## 1          WINDHAM     MOOSUP                  WINDHAM     Other
## 2       LITCHFIELD WINCHESTER               LITCHFIELD          
## 3         HARTFORD       AVON          CT     HARTFORD Residence
## 4        NEW HAVEN  NEW HAVEN          CT    NEW HAVEN Residence
## 5        MIDDLESEX MIDDLETOWN                MIDDLESEX Residence
## 6        FAIRFIELD    NEWTOWN          CT    FAIRFIELD Residence
##   DescriptionofInjury InjuryPlace
## 1                           Other
## 2                           Other
## 3                       Residence
## 4                       Residence
## 5                       Residence
## 6                       Residence
##                                                                 ImmediateCauseA
## 1                                                               Heroin Toxicity
## 2                                                            Oxycodone Toxicity
## 3                                                  Acute Oxycodone Intoxication
## 4        Liver Failure due to cirrhosis due to chronic hepatitis C (OSC Heroin)
## 5                                                               Opiate Toxicity
## 6 Intoxication due to the Combined Effects of Diazepam, Temazepam, and Fentanyl
##   Heroin Cocaine Fentanyl Oxycodone Oxymorphone EtOH Hydro.codeine
## 1      Y                                                          
## 2                                 Y                               
## 3                                 Y                               
## 4      Y                                                          
## 5                                 Y           Y                  Y
## 6                       Y                                         
##   Benzodiazepine Methadone Amphet Tramad Morphine..not.heroin. Other
## 1                                                                   
## 2                                                                   
## 3                                                                   
## 4                                                                   
## 5                                                                   
## 6              Y                                                    
##   Any.Opioid MannerofDeath AmendedMannerofDeath
## 1                 Accident                   NA
## 2                 Accident                   NA
## 3                 Accident                   NA
## 4          Y      Accident                   NA
## 5                 Accident                   NA
## 6          Y      Accident                   NA
##                                  DeathLoc
## 1     Moosup, CT\n(41.712872, -71.881207)
## 2 Winchester, CT\n(41.901973, -73.133851)
## 3       Avon, CT\n(41.809641, -72.830547)
## 4  New Haven, CT\n(41.308252, -72.924161)
## 5 Middletown, CT\n(41.544654, -72.651713)
## 6    Newtown, CT\n(41.413516, -73.308842)
names(AllAbuseData)
##  [1] "CaseNumber"            "Date"                 
##  [3] "Sex"                   "Race"                 
##  [5] "Age"                   "Residence.City"       
##  [7] "Residence.State"       "Residence.County"     
##  [9] "Death.City"            "Death.State"          
## [11] "Death.County"          "Location"             
## [13] "DescriptionofInjury"   "InjuryPlace"          
## [15] "ImmediateCauseA"       "Heroin"               
## [17] "Cocaine"               "Fentanyl"             
## [19] "Oxycodone"             "Oxymorphone"          
## [21] "EtOH"                  "Hydro.codeine"        
## [23] "Benzodiazepine"        "Methadone"            
## [25] "Amphet"                "Tramad"               
## [27] "Morphine..not.heroin." "Other"                
## [29] "Any.Opioid"            "MannerofDeath"        
## [31] "AmendedMannerofDeath"  "DeathLoc"

Tidy And Analyse the data for Drug cause

DeathCauseTidy <- select(AllAbuseData, CaseNumber, Heroin:Other)
DeathCauseTidy <- DeathCauseTidy %>% gather(DrugName, Present, Heroin:Other)
## Warning: attributes are not identical across measure variables; they will
## be dropped
DeathCauseTidy <- DeathCauseTidy %>% filter(Present!="") %>% select(CaseNumber, DrugName)
DeathCauseTidy <- DeathCauseTidy %>% group_by(DrugName) 
DeathCauseDrugCount <- DeathCauseTidy %>% summarise(count=n())
DeathCauseDrugCount %>% arrange(desc(count))
## # A tibble: 13 × 2
##                 DrugName count
##                    <chr> <int>
## 1                 Heroin  1743
## 2                Cocaine   844
## 3               Fentanyl   800
## 4         Benzodiazepine   752
## 5                   EtOH   700
## 6              Oxycodone   455
## 7                  Other   323
## 8              Methadone   288
## 9          Hydro.codeine    91
## 10           Oxymorphone    88
## 11                Amphet    64
## 12                Tramad    62
## 13 Morphine..not.heroin.    35

The most common form of drug death is Heroin followed by Cocaine and Fentanyl

Tidy And Analyse the data for Race Background

BackgroundTidy <- select(AllAbuseData, CaseNumber, Sex, Race, Age)
head(BackgroundTidy)
##   CaseNumber  Sex  Race Age
## 1     Dec-43 Male White  22
## 2     Dec-08 Male White  28
## 3            Male White  36
## 4   15-13536 Male Black  63
## 5   12-12217 Male White  39
## 6    15-6342 Male White  46
RaceTidy <- BackgroundTidy %>% separate(Race, sep=",", into = c("PrimaryRace","SecondaryRace"))
## Warning: Too few values at 2706 locations: 1, 2, 3, 4, 5, 6, 7, 8, 10, 11,
## 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, ...
head(RaceTidy)
##   CaseNumber  Sex PrimaryRace SecondaryRace Age
## 1     Dec-43 Male       White          <NA>  22
## 2     Dec-08 Male       White          <NA>  28
## 3            Male       White          <NA>  36
## 4   15-13536 Male       Black          <NA>  63
## 5   12-12217 Male       White          <NA>  39
## 6    15-6342 Male       White          <NA>  46
PrimaryRaceCount<-RaceTidy %>% group_by(PrimaryRace) %>% summarise(count=n())
colnames(PrimaryRaceCount)[1]<-"Race"
PrimaryRaceCount$Race<-trimws(PrimaryRaceCount$Race)
head(PrimaryRaceCount)
## # A tibble: 6 × 2
##           Race count
##          <chr> <int>
## 1                  9
## 2        Asian    11
## 3 Asian Indian     3
## 4        Black   241
## 5      Chinese     2
## 6     Hawaiian     1
SecRaceCount<-RaceTidy %>% group_by(SecondaryRace) %>% summarise(count=n())
colnames(SecRaceCount)[1]<-"Race"
SecRaceCount$Race<-trimws(SecRaceCount$Race)
AllRaceCount <- left_join(x=PrimaryRaceCount, y=SecRaceCount, "Race")
AllRaceCount$count.y[is.na(AllRaceCount$count.y)] <- 0
head(AllRaceCount)
## # A tibble: 6 × 3
##           Race count.x count.y
##          <chr>   <int>   <dbl>
## 1                    9       0
## 2        Asian      11       0
## 3 Asian Indian       3       0
## 4        Black     241       9
## 5      Chinese       2       0
## 6     Hawaiian       1       0
AllRaceCount <- AllRaceCount %>% mutate(total=count.x+count.y)
AllRaceCount %>% arrange(total)
## # A tibble: 11 × 4
##               Race count.x count.y total
##              <chr>   <int>   <dbl> <dbl>
## 1         Hawaiian       1       0     1
## 2  Native American       1       0     1
## 3          Chinese       2       0     2
## 4     Asian Indian       3       0     3
## 5                        9       0     9
## 6            Asian      11       0    11
## 7          Unknown      13       0    13
## 8            Other       6      12    18
## 9            Black     241       9   250
## 10        Hispanic     327       0   327
## 11           White    2431     318  2749

The biggest abuse is seen in WHITE population. But this needs to be further analysed in terms of state population to ascertain what percentage of population of a specific Race is more affected.

Tidy And Analyse the data for Age Background

BackgroundTidy <- select(AllAbuseData, CaseNumber, Age)
BackgroundTidy<-BackgroundTidy %>% mutate(Age=as.integer(Age))
head(BackgroundTidy)
##   CaseNumber Age
## 1     Dec-43  22
## 2     Dec-08  28
## 3             36
## 4   15-13536  63
## 5   12-12217  39
## 6    15-6342  46
BackgroundTidyBelow20 <- BackgroundTidy %>%  filter(Age<=20) %>% summarise(count=n())
BackgroundTidyBelow20$AgeGroup <- "Below20"
BackgroundTidy2140 <- BackgroundTidy %>%  filter(Age>20 & Age<=40) %>% summarise(count=n())
BackgroundTidy2140$AgeGroup <- "21-40"
BackgroundTidy4160 <- BackgroundTidy %>%  filter(Age>40 & Age<=60) %>% summarise(count=n())
BackgroundTidy4160$AgeGroup <- "41-60"
BackgroundTidyAbove60 <- BackgroundTidy %>%  filter(Age>60) %>% summarise(count=n())
BackgroundTidyAbove60$AgeGroup <- "Above60"
AllAgeGroups <- bind_rows(BackgroundTidyBelow20, BackgroundTidy2140, BackgroundTidy4160, BackgroundTidyAbove60)
AllAgeGroups <- AllAgeGroups[c("AgeGroup", "count")]
AllAgeGroups
##   AgeGroup count
## 1  Below20    65
## 2    21-40  1338
## 3    41-60  1475
## 4  Above60   165

As noted above no major difference seen in drug abuse between Ages above and below 40.