A quick Exploratory look into the ‘Towed Vehicles’ Data set from Feb. 2nd 2015 - Present due to snow conditions in Hartford, CT
1. Get the Data
library(RSocrata)
library(ggplot2)
library(plyr)
library(ggmap)
# read the data from the url provided
# snowData <- read.socrata("https://data.hartford.gov/Public-Safety/Towed-Vehicles-From-02012015/3acs-ahvq")
snowData <- read.csv("Towed_Vehicles_From_02012015.csv", header=T, sep=",")
h.snowData <- head(snowData)
h.snowData
## TowNum Tow.Address Tow.Firm
## 1 304069 50 WILLARD ST CENTRAL GARAGE
## 2 304070 142 CLARK ST A & N AUTO
## 3 304078 80 POPE PARK HWY CAPITOL AUTOMOTIVE LLC
## 4 304079 80 POPE PARK HWY CENTRAL AUTO & TRANSPORT, LLC
## 5 304083 39 CHARTER OAK PL CENTRAL AUTO & TRANSPORT, LLC
## 6 304084 20 CHARTER OAK PL CENTRAL AUTO & TRANSPORT, LLC
## Firm.Address Firm.Phone Vehicle.State
## 1 36 BOND ST. HARTFORD,CT 06114 860-296-1426 CT
## 2 1516,PARK ST,HARTFORD,CT,06106 860-233-7079 CT
## 3 971 CAPITOL AV. HARTFORD,CT 06106 860-236-2602 CT
## 4 195 MAXIM RD. HARTFORD, CT 06114 860-246-7616
## 5 195 MAXIM RD. HARTFORD, CT 06114 860-246-7616 CT
## 6 195 MAXIM RD. HARTFORD, CT 06114 860-246-7616 CT
## Vehicle.Plate Year Make Model Color Date Time
## 1 512YPD 2005 ACURA RL GRAY 2012015 333
## 2 9ARHP4 1996 MERCURY MARQUIS TAN 2012015 342
## 3 9ALSE5 2006 HONDA ODYSSEY BLUE 2012015 835
## 4 2010 CHEVROLET CAMARO BLACK 2012015 835
## 5 2AFMB5 2004 KIA OPTIMA RED 2012015 15
## 6 820RWH 2002 NISSAN MAXIMA TAN 2012015 16
sum.snowData <- summary(snowData)
2. We can answer some basic questions from the data summary even before cleaning it and diving deeper
mean(snowData$Year, na.rm=T) # Average year of car towed?: 2003
## [1] 2003.34
boxplot(snowData$Year, main = "Vehicle Year")

#jpeg('Vehicle.year.bp.jpeg')
#dev.off()
3. Clean the data
# From viewing the head of the dataset, it appears different towing
# companies had different naming conventions for Make, Model, & Color
# Those variables need to be cleaned up ie. "BLK" = "Black"
# The headers are clean and make sense! (except for the Time)
levels(snowData$Color) # What do we need to clean up?
## [1] "" "BLACK" "BLK" "BLK TAN" "BLU"
## [6] "BLUE" "BRN" "BRO" "BROWN" "BROWN GRAY"
## [11] "GOLD" "GRAY" "GREEN" "GRN" "GRY"
## [16] "GRY/BLK" "ORG" "PLE" "PUR" "PUR/GRY"
## [21] "RED" "RED WHT" "SILVER" "TAN" "UNKWN"
## [26] "WHI" "WHITE" "WHT" "YEL" "YELLOW"
snowData$Color[snowData$Color == "WHI"] <- "WHITE"
snowData$Color[snowData$Color == "WHT"] <- "WHITE"
snowData$Color[snowData$Color == "BLK"] <- "BLACK"
snowData$Color[snowData$Color == "BRN"] <- "BROWN"
snowData$Color[snowData$Color == "BRO"] <- "BROWN"
snowData$Color[snowData$Color == "YEL"] <- "YELLOW"
snowData$Color[snowData$Color == "GRY"] <- "GRAY"
snowData$Color[snowData$Color == "BLU"] <- "BLUE"
snowData$Color[snowData$Color == "GRN"] <- "GREEN"
snowData$Color[snowData$Color == "PUR"] <- "PURPLE"
snowData$Color[snowData$Color == "ORG"] <- "ORANGE"
levels(snowData$Make)# What do we need to clean up?
## [1] "" " MITS" "ACUR" "ACURA" "AUDI"
## [6] "BMW" "BUICK" "CADI" "CADILAC" "CADILLAC"
## [11] "CHEV" "CHEVR" "CHEVROLET" "CHEVY" "CHRY"
## [16] "CHRYSLER" "DODGE" "DODGW" "FORD" "FREIGHTLIN"
## [21] "FRHT" "GEO" "GMC" "HOND" "HONDA"
## [26] "HYNDAI" "HYUN" "HYUNDAI" "HYUNDIA" "INFI"
## [31] "INFINITI" "INFINITY" "JAGAR" "JEEP" "KIA"
## [36] "LEXS" "LEXUS" "LNDRV" "MAZDA" "MERC"
## [41] "MERCURY" "MITS" "MITSBUISHI" "MITSU" "MITSUBISHI"
## [46] "NISS" "NISSAN" "OLDS" "OLDSMOBILE" "PLYM"
## [51] "PLYMOTH" "PLYMOUTH" "PONT" "PONTIAC" "SAA"
## [56] "SATURN" "SCION" "SUBA" "SUBARU" "TOTY"
## [61] "TOYOTA" "TOYT" "VOLKSWAGON" "VOLV" "VOLVO"
## [66] "VW"
snowData$Make[snowData$Make == "ACUR"] <- "ACURA"
snowData$Make[snowData$Make == "CADI"] <- "CADILLAC"
snowData$Make[snowData$Make == "CADILAC"] <- "CADILLAC"
snowData$Make[snowData$Make == "CHEV"] <- "CHEVROLET"
snowData$Make[snowData$Make == "CHEVR"] <- "CHEVROLET"
snowData$Make[snowData$Make == "CHEVY"] <- "CHEVROLET"
snowData$Make[snowData$Make == "CHRY"] <- "CHRYSLER"
snowData$Make[snowData$Make == "DODGW"] <- "DODGE"
snowData$Make[snowData$Make == "HOND"] <- "HONDA"
snowData$Make[snowData$Make == "HYNDAI"] <- "HYUNDAI"
snowData$Make[snowData$Make == "HYUN"] <- "HYUNDAI"
snowData$Make[snowData$Make == "HYUNDIA"] <- "HYUNDAI"
snowData$Make[snowData$Make == "INFI"] <- "INFINITI"
snowData$Make[snowData$Make == "INFINITY"] <- "INFINITI"
snowData$Make[snowData$Make == "JAGAR"] <- "JAGUAR"
snowData$Make[snowData$Make == "LEXS"] <- "LEXUS"
snowData$Make[snowData$Make == "MERC"] <- "MERCURY"
snowData$Make[snowData$Make == " MITS"] <- "MITSUBISHI"
snowData$Make[snowData$Make == "MITS"] <- "MITSUBISHI"
snowData$Make[snowData$Make == "MITSBUISHI"] <- "MITSUBISHI"
snowData$Make[snowData$Make == " MITSU"] <- "MITSUBISHI"
snowData$Make[snowData$Make == "NISS"] <- "NISSAN"
snowData$Make[snowData$Make == "OLDS"] <- "OLDSMOBILE"
snowData$Make[snowData$Make == "PLYM"] <- "PLYMOUTH"
snowData$Make[snowData$Make == "PLYMOTH"] <- "PLYMOUTH"
snowData$Make[snowData$Make == "PONT"] <- "PONTIAC"
snowData$Make[snowData$Make == "SUBA"] <- "SUBARU"
snowData$Make[snowData$Make == "TOTY"] <- "TOYOTA"
snowData$Make[snowData$Make == "TOYT"] <- "TOYOTA"
snowData$Make[snowData$Make == "VOLV"] <- "VOLVO"
snowData$Make[snowData$Make == "VW"] <- "VOLKSWAGON"
# CLEAN.snowData is a nice subset of the variables for our inquiry!
CLEAN.snowData <- subset(snowData, select = c(Tow.Firm, Tow.Address, Make, Color, Year))
4. Analysis & Exploration
#####################################################################
# Which tow company been towing the most vehicles?
# Ideally some meta-data about the Tow.Firms would be very helpful
# in analyzing whether one company had more tow trucks on the road or
# more employees on that day!
#####################################################################
Tow <- count(CLEAN.snowData$Tow.Firm)
barplot(height = Tow$freq, names.arg = Tow$x, horiz=F,
main ="Which company has towed the most vehicles?", xlab ="Tow.Firm"
, ylab ="Count")

## FRIENDLY AUTO BODY & TOWING has towed the most vehicles
#####################################################################
# Is there any vehicle color that certain tow firms target?
# One could deduce that brighter colors stand out in the white snow
# more, but since these are contracters working on guidlines to
# tow vehicles that are deliquent in shoveling/moving thier cars
# i'd say all the cars are fair game, and that color popularity lies with
# vehicle buyers
######################################################################
ct = table(CLEAN.snowData$Color, CLEAN.snowData$Tow.Firm)
barplot(ct, las=2, legend = unique(CLEAN.snowData$Color))

#####################################################################
# Is there a parking location that tow firm target the most?
# This can be nicely visualized in a map with the ggmap package
#####################################################################
CLEAN.TA <- CLEAN.snowData$Tow.Address
map.image.address <- paste(CLEAN.TA, ", Hartford, CT")
geo <- geocode(location = map.image.address, output="latlon")
MAP <- ggmap(
get_map(location = 'Hartford', color="color", source="google",
maptype="roadmap", zoom=13),
extent ="device")+ geom_point(data = geo, aes(x = lon, y = lat), alpha=0.5)
MAP
