ut-crime-tinkering.R

Andrew Heiss — Jan 18, 2014, 9:00 PM

library(RCurl)
Loading required package: bitops
library(ggplot2)

# Load data
myCsv <- getURL("https://raw.github.com/trobbins/crime-stats-utah/master/data/index-crimes-by-agency/2012.csv")
ut.crime <- read.csv(textConnection(myCsv))

# Get rid of commas
ut.crime$population <- as.numeric(gsub(",", "", ut.crime$population, fixed=TRUE))
ut.crime$burglary <- as.numeric(gsub(",", "", ut.crime$burglary, fixed=TRUE))
ut.crime$larceny <- as.numeric(gsub(",", "", ut.crime$larceny, fixed=TRUE))
ut.crime$motor.vehicle.theft <- as.numeric(gsub(",", "", ut.crime$motor.vehicle.theft, fixed=TRUE))
ut.crime$total.crime.index <- as.numeric(gsub(",", "", ut.crime$total.crime.index, fixed=TRUE))

# Order by larceny
ut.crime <- ut.crime[order(ut.crime$larceny, decreasing=TRUE), ]
ut.crime$agency <- factor(ut.crime$agency, levels=ut.crime$agency, ordered=TRUE)

# Plot
p <- ggplot(ut.crime[1:5,], aes(x=agency, y=larceny, fill=agency))
p + geom_bar(stat="identity") + labs(x=NULL, y=NULL, title="Highest incidents of larceny\n") + 
  scale_fill_brewer(palette="Set1", guide=FALSE) + theme_bw()

plot of chunk unnamed-chunk-1



# Model stuff
model <- lm(larceny ~ burglary + rape + arson + population, data=ut.crime)
summary(model)

Call:
lm(formula = larceny ~ burglary + rape + arson + population, 
    data = ut.crime)

Residuals:
   Min     1Q Median     3Q    Max 
-945.1  -78.3  -28.4   29.6 2194.3 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 34.99277   33.35157    1.05    0.296    
burglary     5.70405    0.42301   13.48   <2e-16 ***
rape         1.71940    4.58759    0.37    0.708    
arson        3.47805   19.08126    0.18    0.856    
population  -0.00387    0.00185   -2.09    0.038 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 306 on 126 degrees of freedom
  (9 observations deleted due to missingness)
Multiple R-squared:  0.946, Adjusted R-squared:  0.944 
F-statistic:  548 on 4 and 126 DF,  p-value: <2e-16

# Verify correlation
cor(ut.crime$burglary, ut.crime$larceny, use="complete")
[1] 0.9715
p <- ggplot(ut.crime, aes(x=burglary, y=larceny, size=population))
p + geom_point() + theme_bw() + 
  labs(x="\nBurglaries", y="Larcenies\n", title="Correlation between burglary and larceny\n")
Warning: Removed 9 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-1