BEFORE TRANSFORMATION
## Extract crime rate variables and violence rates
crimeClean <- read.csv(file = "crimeClean.csv", header = TRUE, row.names = 1)
# colnames(crimeClean)
subs <- c(106, 108, 110, 112, 121, 114, 116, 118, 120, 122)
crimeRates <- crimeClean[, subs]
colnames(crimeRates) <- c("Murder", "Rape", "Robbery", "Assault", "All Violent",
"Burglary", "Larceny", "AutoTheft", "Arson", "Non Violent")
violenceRates <- crimeRates[, 1:5]
## Look at Violent Crime Variables
source("classDensity.r")
source("classEda.r")
library(car)
# Make better labels
lab1 <- colnames(violenceRates)
lab1[5] <- "All Violent Crimes"
# loop through the variables
for (i in 1:5) {
classEda(violenceRates[, i], lab1 = lab1[i], units = "Rate per 100,000")
}
AFTER TRANSFORMATION
for (i in 1:5) {
classEda(violenceRates[, i]^0.02, lab1 = lab1[i], lab2 = "Raised to the .02 Power",
units = "Rate per 100,000 **.02")
}
CORRELATION PLOT
round(cor(violenceRates), 2)
## Murder Rape Robbery Assault All Violent
## Murder 1.00 0.43 0.67 0.56 0.68
## Rape 0.43 1.00 0.45 0.51 0.58
## Robbery 0.67 0.45 1.00 0.62 0.84
## Assault 0.56 0.51 0.62 1.00 0.95
## All Violent 0.68 0.58 0.84 0.95 1.00
corMat <- cor(crimeRates)
distMat <- 1 - abs(corMat)
xloc <- cmdscale(distMat, k = 1)
ord <- order(xloc)
xloc[ord, 1]
## Larceny Rape Non Violent Burglary Assault Arson
## -0.35584 -0.20322 -0.17547 -0.08668 0.03242 0.05243
## All Violent Robbery Murder AutoTheft
## 0.08622 0.20170 0.21523 0.23321
corMatOrd <- corMat[ord, ord]
source("panelCorrgram.r")
library(lattice)
library(grid)
library(ellipse)
##
## Attaching package: 'ellipse'
##
## The following object is masked from 'package:car':
##
## ellipse
levelplot(corMatOrd, at = do.breaks(c(-1.01, 1.01), 20), xlab = NULL, ylab = NULL,
colorkey = list(space = "top"), scales = list(x = list(rot = 90)), panel = panel.corrgram,
label = TRUE)
levelplot(corMatOrd, xlab = NULL, ylab = NULL, at = do.breaks(c(-1.01, 1.01),
100), panel = panel.corrgram.2, scales = list(x = list(rot = 90)), colorkey = list(space = "top"),
col.regions = colorRampPalette(c("red", "white", "blue")))
Scatterplot matrices and hexagon binning
## Run
pairs(violenceRates, main = "Scatterplot Matrix", gap = 0, las = 1, row1attop = FALSE)
library(lattice)
library(hexbin)
splom(violenceRates, cex.labels = 0.2, pscale = 0, trans = function(x) x^0.5,
panel = panel.hexbinplot)
Scatterplot matrix with smoothes
splom(violenceRates, main = "Rates per 100000", lwd = 2, col.line = "red", pscale = 0,
type = c("p", "g", "smooth"))
Regression diagnostics after transformation
lm2 <- lm(ViolentCrimesPerPop^0.02 ~ ., data = crimeRegLog)
plot(lm2)
Stepwise regression variable removal
plot(lm3)
Looking at their univariate distributions
tmp <- lm3$terms
termLab <- attr(tmp, which = "term.labels")
for (i in 1:length(termLab)) {
par(ask = TRUE)
classEda(crimeRegLog[, termLab[i]], lab1 = termLab[i])
}