Content

The Murder Accountability Project is the most complete database of homicides in the United States currently available. This dataset includes murders from the FBI’s Supplementary Homicide Report from 1976 to the present and Freedom of Information Act data on more than 22,000 homicides that were not reported to the Justice Department. This dataset includes the age, race, sex, ethnicity of victims and perpetrators, in addition to the relationship between the victim and perpetrator and weapon used.

Error: attempt to use zero-length variable name

First to start with a glimpse of what data we are dealing with I am using glimpse() function from dplyr package.

glimpse(homicides)
Observations: 638,454
Variables: 24
$ RecordID             <chr> "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008", "000009", "00...
$ AgencyCode           <chr> "AK00101", "AK00101", "AK00101", "AK00101", "AK00101", "AK00101", "AK00101", "AK00101", "AK00...
$ AgencyName           <chr> "Anchorage", "Anchorage", "Anchorage", "Anchorage", "Anchorage", "Anchorage", "Anchorage", "A...
$ AgencyType           <chr> "Municipal Police", "Municipal Police", "Municipal Police", "Municipal Police", "Municipal Po...
$ City                 <chr> "Anchorage", "Anchorage", "Anchorage", "Anchorage", "Anchorage", "Anchorage", "Anchorage", "A...
$ State                <chr> "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Al...
$ Year                 <int> 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 198...
$ Month                <chr> "January", "March", "March", "April", "April", "May", "May", "June", "June", "June", "July", ...
$ Incident             <int> 1, 1, 2, 1, 2, 1, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, ...
$ CrimeType            <chr> "Murder or Manslaughter", "Murder or Manslaughter", "Murder or Manslaughter", "Murder or Mans...
$ CrimeSolved          <chr> "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes",...
$ VictimSex            <chr> "Male", "Male", "Female", "Male", "Female", "Male", "Female", "Female", "Male", "Male", "Male...
$ VictimAge            <int> 14, 43, 30, 43, 30, 30, 42, 99, 32, 38, 36, 20, 36, 20, 48, 31, 16, 33, 27, 33, 31, 21, 60, 4...
$ VictimRace           <chr> "Native American/Alaska Native", "White", "Native American/Alaska Native", "White", "Native A...
$ VictimEthnicity      <chr> "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unkn...
$ PerpetratorSex       <chr> "Male", "Male", "Unknown", "Male", "Unknown", "Male", "Male", "Male", "Unknown", "Male", "Unk...
$ PerpetratorAge       <int> 15, 42, 0, 42, 0, 36, 27, 35, 0, 40, 0, 49, 39, 49, 0, 29, 19, 23, 33, 35, 29, 29, 26, 41, 29...
$ PerpetratorRace      <chr> "Native American/Alaska Native", "White", "Unknown", "White", "Unknown", "White", "Black", "W...
$ PerpetratorEthnicity <chr> "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unkn...
$ Relationship         <chr> "Acquaintance", "Acquaintance", "Unknown", "Acquaintance", "Unknown", "Acquaintance", "Wife",...
$ Weapon               <chr> "Blunt Object", "Strangulation", "Unknown", "Strangulation", "Unknown", "Rifle", "Knife", "Kn...
$ VictimCount          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, ...
$ PerpetratorCount     <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ RecordSource         <chr> "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "FBI", "F...

Now that we know what kind of data we have in homicides data set, let us try to know how many homicides are taking place every year.

homicides %>% 
  count(Year) %>%
  select( Year, CountOfHomicides = n) %>% 
  arrange(Year)

The top 5 states where Homicide rates are very high can be calculated as follows.

homicides %>% 
  count(State) %>% 
  mutate(`No of Deaths` = n, Rank = rank(desc(n))) %>% 
  select(-n) %>%
  arrange(Rank) %>% 
  top_n(n = 5,wt = desc(Rank))

State of California has the highest number of Homicidal Deaths.

If a graph has to be plotted for the same details, then I would use the below syntax.

Lets check how many crimes has been solved/not solved for every year.

ggplot(homicides,aes(x=Year,fill=CrimeSolved))+geom_bar(position = "dodge")

homicides %>% filter(CrimeSolved == "No") %>% count(Year) %>% top_n(n=10,wt=desc(n)) %>% arrange(desc(n))
homicides %>% count(Year,VictimRace) %>% spread(VictimRace,n)
homicides %>% 
  count(VictimRace,PerpetratorRace) %>% 
  spread(VictimRace,n) %>% 
  filter(PerpetratorRace != "Unknown") %>% 
  select(-Unknown)
homicides %>%
  count(Relationship) %>%
  filter(Relationship != "Unknown") %>%
  arrange(desc(n)) %>%
  top_n(n = 10,wt=n)
ggplot(homicides_clean,aes(x=as.factor(Year),y=VictimAge,color=VictimRace))+geom_jitter(alpha=0.5)
LS0tDQp0aXRsZTogIkhvbWljaWRlcyBEYXRhIGZyb20gS2FnZ2xlIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KQ29udGVudA0KDQpUaGUgTXVyZGVyIEFjY291bnRhYmlsaXR5IFByb2plY3QgaXMgdGhlIG1vc3QgY29tcGxldGUgZGF0YWJhc2Ugb2YgaG9taWNpZGVzIGluIHRoZSBVbml0ZWQgU3RhdGVzIGN1cnJlbnRseSBhdmFpbGFibGUuIFRoaXMgZGF0YXNldCBpbmNsdWRlcyBtdXJkZXJzIGZyb20gdGhlIEZCSSdzIFN1cHBsZW1lbnRhcnkgSG9taWNpZGUgUmVwb3J0IGZyb20gMTk3NiB0byB0aGUgcHJlc2VudCBhbmQgRnJlZWRvbSBvZiBJbmZvcm1hdGlvbiBBY3QgZGF0YSBvbiBtb3JlIHRoYW4gMjIsMDAwIGhvbWljaWRlcyB0aGF0IHdlcmUgbm90IHJlcG9ydGVkIHRvIHRoZSBKdXN0aWNlIERlcGFydG1lbnQuIFRoaXMgZGF0YXNldCBpbmNsdWRlcyB0aGUgYWdlLCByYWNlLCBzZXgsIGV0aG5pY2l0eSBvZiB2aWN0aW1zIGFuZCBwZXJwZXRyYXRvcnMsIGluIGFkZGl0aW9uIHRvIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiB0aGUgdmljdGltIGFuZCBwZXJwZXRyYXRvciBhbmQgd2VhcG9uIHVzZWQuDQoNCmBgYHtyIGVjaG89RkFMU0V9DQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShnZ3ZpcykNCmxpYnJhcnkodGlkeXIpDQpgYGANCg0KRmlyc3QgdG8gc3RhcnQgd2l0aCBhIGdsaW1wc2Ugb2Ygd2hhdCBkYXRhIHdlIGFyZSBkZWFsaW5nIHdpdGggSSBhbSB1c2luZyBnbGltcHNlKCkgZnVuY3Rpb24gZnJvbSBkcGx5ciBwYWNrYWdlLg0KDQpgYGB7cn0NCmdsaW1wc2UoaG9taWNpZGVzKQ0KYGBgDQoNCk5vdyB0aGF0IHdlIGtub3cgd2hhdCBraW5kIG9mIGRhdGEgd2UgaGF2ZSBpbiBob21pY2lkZXMgZGF0YSBzZXQsIGxldCB1cyB0cnkgdG8ga25vdyBob3cgbWFueSBob21pY2lkZXMgYXJlIHRha2luZyBwbGFjZQ0KZXZlcnkgeWVhci4NCg0KYGBge3J9DQpob21pY2lkZXMgJT4lIA0KICBjb3VudChZZWFyKSAlPiUNCiAgc2VsZWN0KCBZZWFyLCBDb3VudE9mSG9taWNpZGVzID0gbikgJT4lIA0KICBhcnJhbmdlKFllYXIpDQpgYGANCg0KVGhlIHRvcCA1IHN0YXRlcyB3aGVyZSBIb21pY2lkZSByYXRlcyBhcmUgdmVyeSBoaWdoIGNhbiBiZSBjYWxjdWxhdGVkIGFzIGZvbGxvd3MuDQpgYGB7cn0NCmhvbWljaWRlcyAlPiUgDQogIGNvdW50KFN0YXRlKSAlPiUgDQogIG11dGF0ZShgTm8gb2YgRGVhdGhzYCA9IG4sIFJhbmsgPSByYW5rKGRlc2MobikpKSAlPiUgDQogIHNlbGVjdCgtbikgJT4lDQogIGFycmFuZ2UoUmFuaykgJT4lIA0KICB0b3BfbihuID0gNSx3dCA9IGRlc2MoUmFuaykpDQpgYGANCg0KU3RhdGUgb2YgQ2FsaWZvcm5pYSBoYXMgdGhlIGhpZ2hlc3QgbnVtYmVyIG9mIEhvbWljaWRhbCBEZWF0aHMuDQoNCklmIGEgZ3JhcGggaGFzIHRvIGJlIHBsb3R0ZWQgZm9yIHRoZSBzYW1lIGRldGFpbHMsIHRoZW4gSSB3b3VsZCB1c2UgdGhlIGJlbG93IHN5bnRheC4NCg0KYGBge3IgZWNobz1GQUxTRX0NCmhvbWljaWRlcyAlPiUgDQogIGNvdW50KFN0YXRlKSAlPiUNCiAgbXV0YXRlKFJhbmsgPSByYW5rKGRlc2MobikpKSAlPiUNCiAgYXJyYW5nZShkZXNjKG4pKSAlPiUNCiAgdG9wX24obj0xMCx3dD1kZXNjKFJhbmspKSAlPiUNCiAgZ2d2aXMoflN0YXRlLH5uLGZpbGxPcGFjaXR5IDo9IDAuNSkgJT4lDQogIGxheWVyX2JhcnMoc3RhY2sgPSBGQUxTRSkgJT4lDQogIGFkZF9heGlzKHR5cGUgPSAieSIsdGl0bGU9Ik5vIG9mIERlYXRocyIsdGl0bGVfb2Zmc2V0ID0gNTAsdGlja3MgPSA1LHN1YmRpdmlkZSA9IDMsdGlja19zaXplX21ham9yID0gMTAsdGlja19zaXplX21pbm9yID0gNSkNCmBgYA0KDQpMZXRzIGNoZWNrIGhvdyBtYW55IGNyaW1lcyBoYXMgYmVlbiBzb2x2ZWQvbm90IHNvbHZlZCBmb3IgZXZlcnkgeWVhci4NCg0KYGBge3J9DQpnZ3Bsb3QoaG9taWNpZGVzLGFlcyh4PVllYXIsZmlsbD1DcmltZVNvbHZlZCkpK2dlb21fYmFyKHBvc2l0aW9uID0gImRvZGdlIikNCmBgYA0KDQpgYGB7cn0NCmhvbWljaWRlcyAlPiUgZmlsdGVyKENyaW1lU29sdmVkID09ICJObyIpICU+JSBjb3VudChZZWFyKSAlPiUgdG9wX24obj0xMCx3dD1kZXNjKG4pKSAlPiUgYXJyYW5nZShkZXNjKG4pKQ0KYGBgDQoNCmBgYHtyfQ0KaG9taWNpZGVzICU+JSBjb3VudChZZWFyLFZpY3RpbVJhY2UpICU+JSBzcHJlYWQoVmljdGltUmFjZSxuKQ0KYGBgDQoNCmBgYHtyfQ0KaG9taWNpZGVzICU+JSANCiAgY291bnQoVmljdGltUmFjZSxQZXJwZXRyYXRvclJhY2UpICU+JSANCiAgc3ByZWFkKFZpY3RpbVJhY2UsbikgJT4lIA0KICBmaWx0ZXIoUGVycGV0cmF0b3JSYWNlICE9ICJVbmtub3duIikgJT4lIA0KICBzZWxlY3QoLVVua25vd24pDQpgYGANCg0KDQpgYGB7cn0NCmhvbWljaWRlcyAlPiUNCiAgY291bnQoUmVsYXRpb25zaGlwKSAlPiUNCiAgZmlsdGVyKFJlbGF0aW9uc2hpcCAhPSAiVW5rbm93biIpICU+JQ0KICBhcnJhbmdlKGRlc2MobikpICU+JQ0KICB0b3BfbihuID0gMTAsd3Q9bikNCmBgYA0KDQpgYGB7cn0NCmdncGxvdChob21pY2lkZXNfY2xlYW4sYWVzKHg9YXMuZmFjdG9yKFllYXIpLHk9VmljdGltQWdlLGNvbG9yPVZpY3RpbVJhY2UpKStnZW9tX2ppdHRlcihhbHBoYT0wLjUpDQpgYGANCg0K