Import ‘FBI Crime Data 2016’ dataset

library(readr, verbose = FALSE)
FBI <- read_csv("J:/SOC712/R12022524_SL050.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
head(FBI)

Therer are 2,589 observations and 35 variables.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
names(FBI)
##  [1] "FIPS"                                                                  
##  [2] "Name of Area"                                                          
##  [3] "Qualifying Name"                                                       
##  [4] "Nation"                                                                
##  [5] "State"                                                                 
##  [6] "County"                                                                
##  [7] "Total Population (2016 est.)"                                          
##  [8] "Total Violent and Property Crimes"                                     
##  [9] "Total Violent and Property Crimes: Violent Crimes"                     
## [10] "Total Violent and Property Crimes: Property Crimes"                    
## [11] "Total Violent and Property Crimes Rate"                                
## [12] "Total Violent and Property Crimes Rate: Violent Crimes Rate"           
## [13] "Total Violent and Property Crimes Rate: Property Crimes Rate"          
## [14] "Total Violent Crimes"                                                  
## [15] "Total Violent Crimes: Murders and Nonnegligent Manslaughters"          
## [16] "Total Violent Crimes: Rape Revised Definition"                         
## [17] "Total Violent Crimes: Rape Legacy Definition"                          
## [18] "Total Violent Crimes: Robberies"                                       
## [19] "Total Violent Crimes: Aggravated Assaults"                             
## [20] "Total Violent Crimes Rate"                                             
## [21] "Total Violent Crimes Rate: Murders and Nonnegligent Manslaughters Rate"
## [22] "Total Violent Crimes Rate: Rape Revised Definition Rate"               
## [23] "Total Violent Crimes Rate: Rape Legacy Definition Rate"                
## [24] "Total Violent Crimes Rate: Robberies Rate"                             
## [25] "Total Violent Crimes Rate: Aggravated Assaults Rate"                   
## [26] "Total Property Crimes"                                                 
## [27] "Total Property Crimes: Burglaries"                                     
## [28] "Total Property Crimes: Larcenies"                                      
## [29] "Total Property Crimes: Motor Vehicle Thefts"                           
## [30] "Total Property Crimes Rate"                                            
## [31] "Total Property Crimes Rate: Burglaries Rate"                           
## [32] "Total Property Crimes Rate: Larcenies Rate"                            
## [33] "Total Property Crimes Rate: Motor Vehicle Thefts Rate"                 
## [34] "Arsons"                                                                
## [35] "Arsons Rate"
dim(FBI)
## [1] 2589   35
FBI_sel <- select(FBI, "Qualifying Name", "State", ("Total Population (2016 est.)" : "Arsons Rate"))
dim(FBI_sel)
## [1] 2589   31
arrange(FBI_sel, State)

Creating new variables of different states.

AL <- filter(FBI_sel, State == "01")
head(AL)
AZ <- filter(FBI_sel, State == "04")
AR <- filter(FBI_sel, State == "05")
CA <- filter(FBI_sel, State == "06")
CO <- filter(FBI_sel, State == "08")
DE <- filter(FBI_sel, State == "10")
FL <- filter(FBI_sel, State == "12")
GA <- filter(FBI_sel, State == "13")
HI <- filter(FBI_sel, State == "15")
ID <- filter(FBI_sel, State == "16")
IL <- filter(FBI_sel, State == "17")
IN <- filter(FBI_sel, State == "18")
IA <- filter(FBI_sel, State == "19")
KS <- filter(FBI_sel, State == "20")
KY <- filter(FBI_sel, State == "21")
LA <- filter(FBI_sel, State == "22")
ME <- filter(FBI_sel, State == "23")
MD <- filter(FBI_sel, State == "24")
MI <- filter(FBI_sel, State == "26")
MN <- filter(FBI_sel, State == "27")
MS <- filter(FBI_sel, State == "28")
MS <- filter(FBI_sel, State == "29")
MT <- filter(FBI_sel, State == "30")
NE <- filter(FBI_sel, State == "31")
NV <- filter(FBI_sel, State == "32")
NH <- filter(FBI_sel, State == "33")
NJ <- filter(FBI_sel, State == "34")
NM <- filter(FBI_sel, State == "35")
NY <- filter(FBI_sel, State == "36")
NC <- filter(FBI_sel, State == "37")
ND <- filter(FBI_sel, State == "38")
OH <- filter(FBI_sel, State == "39")
OK <- filter(FBI_sel, State == "40")
OR <- filter(FBI_sel, State == "41")
PA <- filter(FBI_sel, State == "42")
SC <- filter(FBI_sel, State == "45")
SD <- filter(FBI_sel, State == "46")
TN <- filter(FBI_sel, State == "47")
TX <- filter(FBI_sel, State == "48")
UT <- filter(FBI_sel, State == "49")
VT <- filter(FBI_sel, State == "50")
VA <- filter(FBI_sel, State == "51")
WA <- filter(FBI_sel, State == "53")
WV <- filter(FBI_sel, State == "54")
WI <- filter(FBI_sel, State == "55")
WY <- filter(FBI_sel, State == "56")

According to the U.S. Census Bureau The 10 Most Populous States on July 1, 2015, are, by order: 1. California (CA) 2. Texas (TX) 3. Florida (FL) 4. New York (NY) 5. Illinois (IL) 6. Pennsylvania (PA) 7. Ohio (OH) 8. Georgia (GA) 9. North Carolina (NC) 10. Michigan (MI)

In this assignment, I will Compare if the above order applies to the crime rate (Violent and property crimes separately)

Computing states: total population, total violent and property crimes, total violent crimes, and total property crimes

CA_var = select(CA, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
TX_var = select(TX, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
FL_var = select(FL, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
NY_var = select(NY, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
IL_var = select(IL, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
PA_var = select(PA, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
OH_var = select(OH, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
GA_var = select(GA, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
NC_var = select(NC, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")
MI_var = select(MI, "Total Population (2016 est.)", "Total Violent and Property Crimes", "Total Violent and Property Crimes: Violent Crimes", "Total Violent and Property Crimes: Property Crimes")

class(TX_var)  
## [1] "tbl_df"     "tbl"        "data.frame"
CA_val <- type.convert(CA_var)
CA_fin <- colSums(CA_val, na.rm = TRUE)
TX_val <- type.convert(TX_var)
TX_fin <- colSums(TX_val, na.rm = TRUE)
CA_val <- type.convert(CA_val)
CA_fin <- colSums(CA_val, na.rm = TRUE)
FL_var <- type.convert(FL_var)
FL_fin <- colSums(FL_var, na.rm = TRUE)
NY_var <- type.convert(NY_var)
NY_fin <- colSums(NY_var, na.rm = TRUE)
IL_var <- type.convert(IL_var)
IL_fin <- colSums(IL_var, na.rm = TRUE)
PA_var <- type.convert(PA_var)
PA_fin <- colSums(PA_var, na.rm = TRUE)
OH_var <- type.convert(OH_var)
OH_fin <- colSums(OH_var, na.rm = TRUE)
GA_var <- type.convert(GA_var)
GA_fin <- colSums(GA_var, na.rm = TRUE)
NC_var <- type.convert(NC_var)
NC_fin <- colSums(NC_var, na.rm = TRUE)
MI_var <- type.convert(MI_var)
MI_fin <- colSums(MI_var, na.rm = TRUE)
data <- data.frame(CA = CA_fin, TX = TX_fin, FL = FL_fin, NY = NY_fin, IL = IL_fin, PA = PA_fin, OH = OH_fin, GA = GA_fin, NC = NC_fin, MI = MI_fin)


data
table = t(data)

table
##    Total Population (2016 est.) Total Violent and Property Crimes
## CA                     38379130                            129923
## TX                     27635924                            138052
## FL                     19660770                            248222
## NY                      9943131                             47097
## IL                     12286935                             18455
## PA                      7188286                                44
## OH                      8522964                             40570
## GA                      8066326                            136817
## NC                      6696357                             48468
## MI                      9607636                             33971
##    Total Violent and Property Crimes: Violent Crimes
## CA                                             26286
## TX                                             19850
## FL                                             37499
## NY                                              4179
## IL                                              2328
## PA                                                37
## OH                                              2832
## GA                                             15650
## NC                                              4565
## MI                                              5643
##    Total Violent and Property Crimes: Property Crimes
## CA                                             103637
## TX                                             118202
## FL                                             210723
## NY                                              42918
## IL                                              16127
## PA                                                  7
## OH                                              37738
## GA                                             121167
## NC                                              43903
## MI                                              28328

Renaming and Calculating

as_tibble(table)
table <- as.data.frame(table)
sapply(table, class)
##                       Total Population (2016 est.) 
##                                          "numeric" 
##                  Total Violent and Property Crimes 
##                                          "numeric" 
##  Total Violent and Property Crimes: Violent Crimes 
##                                          "numeric" 
## Total Violent and Property Crimes: Property Crimes 
##                                          "numeric"
table <- rename(table, Total_Population = "Total Population (2016 est.)", Total_Crimes = "Total Violent and Property Crimes", Total_Violent_Crimes = "Total Violent and Property Crimes: Violent Crimes", Total_Property_Crimes = "Total Violent and Property Crimes: Property Crimes")

table1 <- mutate(table, Violent_Crime_Rate = Total_Violent_Crimes / Total_Population * 100000, Property_Crime_Rate = Total_Property_Crimes / Total_Population * 100000)


State <- list("CA", "TX", "FL", "NY", "IL", "PA", "OH", "GA", "NC", "MI")
table1$State <- State
table1 <- select(table1, State, Total_Population, Total_Crimes, Total_Violent_Crimes, Total_Property_Crimes, Violent_Crime_Rate, Property_Crime_Rate)

table1

Visualization

library(ggplot2)
ggplot(data = table1, aes(Violent_Crime_Rate)) + geom_histogram(color = 'red', bins = 10) + labs(title = paste("Violent Crime Rates of 10 States"))

ggplot(data = table1, aes(Property_Crime_Rate)) + geom_histogram(color = 'blue', bins = 10) + labs(title = paste("Property Crime Rates of 10 States"))

References

United States Census Bureau. (2015). North Carolina Becomes Ninth State With 10 Million or More People, Census Bureau Reports. Retrieved from https://www.census.gov/newsroom/press-releases/2015/cb15-215.html