Tarek Dib
September 27, 2015
SF_Crimes <- read.csv("data/SFCrimes.csv")
# Load lubridate, dplyr and ggplot2 packages
library(lubridate)
library(dplyr)
library(ggplot2)
# Set the format of Date as year-month-day using the mdy function from lubridate
SF_Crimes$Date <- mdy(SF_Crimes$Date)
# Extract year and create a new variable Year in the SF_Crimes data frame
SF_Crimes$Year <- as.factor(year(SF_Crimes$Date))
# Total number of arrests in each category sorted by decreasing order
totCat <- within(SF_Crimes, Category <- factor(Category, levels=names(sort(table(Category), decreasing = T))))
# Use count() function to count the number of crimes grouped Category
crime_category_count <- data.frame(count_(SF_Crimes, "Category"))
# Rename columns of the crime_category_count data frame
names(crime_category_count)[2] <- "Count"
# Reorder by Category in descending order of count
crime_category_count <- crime_category_count[order(crime_category_count$Count, decreasing = T),]
# Check the first few rows of the crime_category_count data frame
# head(crime_category_count)
# Look only at the crimes that occured 5000 or more in the last 3 years
crime_category_count_high <- subset(crime_category_count, Count >= 5000)
# Number of Crimes by Police Department District #
tab_District <- table(SF_Crimes$PdDistrict, SF_Crimes$Year)
tot_District <- within(SF_Crimes, PdDistrict <- factor(PdDistrict,
levels=names(sort(table(PdDistrict), decreasing = T))))
tot_District$Year <- as.factor(tot_District$Year)
# Monthly total number of Crimes by certain categories #
crime_date <- data.frame(count_(SF_Crimes, "Date"))
names(crime_date)[2] <- "Count"
crime_date$Year <- year(crime_date$Date)
crime_date$Month <- month(crime_date$Date)
crime_date$myear <- paste0(crime_date$Month,"-",crime_date$Year)
# Monthly count of crimes
crime_monthly <- data.frame(count_(crime_date, "myear"))
names(crime_monthly)[2] <- "Count"
# Remove the months of September 2015 and July 2013 since not the whole months were available.
crime_monthly_sub <- crime_monthly[!(crime_monthly$myear%in%c("7-2013","9-2015")),]
crime_monthly_sub$myear <- factor(crime_monthly_sub$myear, levels = c( '8-2013', '8-2014', '8-2015', '9-2013', '9-2014', '10-2013', '10-2014', '11-2013', '11-2014', '12-2013', '12-2014', '1-2014','1-2015', '2-2014', '2-2015', '3-2014', '3-2015', '4-2014', '4-2015', '5-2014', '5-2015','6-2014', '6-2015', '7-2014', '7-2015'))
# Explore Larceny/Theft Crime category in Detail ###################
lar_theft_cat <- subset(SF_Crimes, Category == "LARCENY/THEFT")
# theft crimes by time. total number of theft crimes 100 or more
theft_count <- data.frame(count_(lar_theft_cat, "Time"))
names(theft_count)[2] <- c("Count")
theft_count_sub <- subset(theft_count, Count >= 100)
# Daily number of thefts
theft_daily <- data.frame(count_(lar_theft_cat, "Date"))
names(theft_daily)[2] <- "Count"
theft_avg <- mean(theft_daily$Count)
########### Total Crimes #############
total_crimes <- data.frame(count_(SF_Crimes, "Date"))
names(total_crimes)[2] <- "Count"