Andrew Heiss — Jan 18, 2014, 9:00 PM
library(RCurl)
Loading required package: bitops
library(ggplot2)
# Load data
myCsv <- getURL("https://raw.github.com/trobbins/crime-stats-utah/master/data/index-crimes-by-agency/2012.csv")
ut.crime <- read.csv(textConnection(myCsv))
# Get rid of commas
ut.crime$population <- as.numeric(gsub(",", "", ut.crime$population, fixed=TRUE))
ut.crime$burglary <- as.numeric(gsub(",", "", ut.crime$burglary, fixed=TRUE))
ut.crime$larceny <- as.numeric(gsub(",", "", ut.crime$larceny, fixed=TRUE))
ut.crime$motor.vehicle.theft <- as.numeric(gsub(",", "", ut.crime$motor.vehicle.theft, fixed=TRUE))
ut.crime$total.crime.index <- as.numeric(gsub(",", "", ut.crime$total.crime.index, fixed=TRUE))
# Order by larceny
ut.crime <- ut.crime[order(ut.crime$larceny, decreasing=TRUE), ]
ut.crime$agency <- factor(ut.crime$agency, levels=ut.crime$agency, ordered=TRUE)
# Plot
p <- ggplot(ut.crime[1:5,], aes(x=agency, y=larceny, fill=agency))
p + geom_bar(stat="identity") + labs(x=NULL, y=NULL, title="Highest incidents of larceny\n") +
scale_fill_brewer(palette="Set1", guide=FALSE) + theme_bw()
# Model stuff
model <- lm(larceny ~ burglary + rape + arson + population, data=ut.crime)
summary(model)
Call:
lm(formula = larceny ~ burglary + rape + arson + population,
data = ut.crime)
Residuals:
Min 1Q Median 3Q Max
-945.1 -78.3 -28.4 29.6 2194.3
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 34.99277 33.35157 1.05 0.296
burglary 5.70405 0.42301 13.48 <2e-16 ***
rape 1.71940 4.58759 0.37 0.708
arson 3.47805 19.08126 0.18 0.856
population -0.00387 0.00185 -2.09 0.038 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 306 on 126 degrees of freedom
(9 observations deleted due to missingness)
Multiple R-squared: 0.946, Adjusted R-squared: 0.944
F-statistic: 548 on 4 and 126 DF, p-value: <2e-16
# Verify correlation
cor(ut.crime$burglary, ut.crime$larceny, use="complete")
[1] 0.9715
p <- ggplot(ut.crime, aes(x=burglary, y=larceny, size=population))
p + geom_point() + theme_bw() +
labs(x="\nBurglaries", y="Larcenies\n", title="Correlation between burglary and larceny\n")
Warning: Removed 9 rows containing missing values (geom_point).