A quick Exploratory look into the ‘Towed Vehicles’ Data set from Feb. 2nd 2015 - Present due to snow conditions in Hartford, CT

1. Get the Data

library(RSocrata)
library(ggplot2)
library(plyr)
library(ggmap)
# read the data from the url provided
# snowData <- read.socrata("https://data.hartford.gov/Public-Safety/Towed-Vehicles-From-02012015/3acs-ahvq")
snowData <- read.csv("Towed_Vehicles_From_02012015.csv", header=T, sep=",")
h.snowData <- head(snowData)
h.snowData
##   TowNum         Tow.Address                      Tow.Firm
## 1 304069       50 WILLARD ST                CENTRAL GARAGE
## 2 304070        142 CLARK ST                    A & N AUTO
## 3 304078    80 POPE PARK HWY        CAPITOL AUTOMOTIVE LLC
## 4 304079    80 POPE PARK HWY CENTRAL AUTO & TRANSPORT, LLC
## 5 304083   39 CHARTER OAK PL CENTRAL AUTO & TRANSPORT, LLC
## 6 304084   20 CHARTER OAK PL CENTRAL AUTO & TRANSPORT, LLC
##                         Firm.Address   Firm.Phone Vehicle.State
## 1      36 BOND ST. HARTFORD,CT 06114 860-296-1426            CT
## 2     1516,PARK ST,HARTFORD,CT,06106 860-233-7079            CT
## 3  971 CAPITOL AV. HARTFORD,CT 06106 860-236-2602            CT
## 4   195 MAXIM RD. HARTFORD, CT 06114 860-246-7616              
## 5   195 MAXIM RD. HARTFORD, CT 06114 860-246-7616            CT
## 6   195 MAXIM RD. HARTFORD, CT 06114 860-246-7616            CT
##   Vehicle.Plate Year      Make   Model Color    Date Time
## 1        512YPD 2005     ACURA      RL  GRAY 2012015  333
## 2        9ARHP4 1996   MERCURY MARQUIS   TAN 2012015  342
## 3        9ALSE5 2006     HONDA ODYSSEY  BLUE 2012015  835
## 4               2010 CHEVROLET  CAMARO BLACK 2012015  835
## 5        2AFMB5 2004       KIA  OPTIMA   RED 2012015   15
## 6        820RWH 2002    NISSAN  MAXIMA   TAN 2012015   16
sum.snowData <- summary(snowData)

2. We can answer some basic questions from the data summary even before cleaning it and diving deeper

mean(snowData$Year, na.rm=T) # Average year of car towed?: 2003 
## [1] 2003.34
boxplot(snowData$Year, main = "Vehicle Year")

#jpeg('Vehicle.year.bp.jpeg')
#dev.off()

3. Clean the data

# From viewing the head of the dataset, it appears different towing 
# companies had different naming conventions for Make, Model, & Color
# Those variables need to be cleaned up ie. "BLK" = "Black"
# The headers are clean and make sense! (except for the Time)

levels(snowData$Color) # What do we need to clean up?
##  [1] ""           "BLACK"      "BLK"        "BLK TAN"    "BLU"       
##  [6] "BLUE"       "BRN"        "BRO"        "BROWN"      "BROWN GRAY"
## [11] "GOLD"       "GRAY"       "GREEN"      "GRN"        "GRY"       
## [16] "GRY/BLK"    "ORG"        "PLE"        "PUR"        "PUR/GRY"   
## [21] "RED"        "RED WHT"    "SILVER"     "TAN"        "UNKWN"     
## [26] "WHI"        "WHITE"      "WHT"        "YEL"        "YELLOW"
snowData$Color[snowData$Color == "WHI"] <- "WHITE"
snowData$Color[snowData$Color == "WHT"] <- "WHITE"
snowData$Color[snowData$Color == "BLK"] <- "BLACK"
snowData$Color[snowData$Color == "BRN"] <- "BROWN"
snowData$Color[snowData$Color == "BRO"] <- "BROWN"
snowData$Color[snowData$Color == "YEL"] <- "YELLOW"
snowData$Color[snowData$Color == "GRY"] <- "GRAY"
snowData$Color[snowData$Color == "BLU"] <- "BLUE"
snowData$Color[snowData$Color == "GRN"] <- "GREEN"
snowData$Color[snowData$Color == "PUR"] <- "PURPLE"
snowData$Color[snowData$Color == "ORG"] <- "ORANGE"

levels(snowData$Make)# What do we need to clean up?
##  [1] ""           " MITS"      "ACUR"       "ACURA"      "AUDI"      
##  [6] "BMW"        "BUICK"      "CADI"       "CADILAC"    "CADILLAC"  
## [11] "CHEV"       "CHEVR"      "CHEVROLET"  "CHEVY"      "CHRY"      
## [16] "CHRYSLER"   "DODGE"      "DODGW"      "FORD"       "FREIGHTLIN"
## [21] "FRHT"       "GEO"        "GMC"        "HOND"       "HONDA"     
## [26] "HYNDAI"     "HYUN"       "HYUNDAI"    "HYUNDIA"    "INFI"      
## [31] "INFINITI"   "INFINITY"   "JAGAR"      "JEEP"       "KIA"       
## [36] "LEXS"       "LEXUS"      "LNDRV"      "MAZDA"      "MERC"      
## [41] "MERCURY"    "MITS"       "MITSBUISHI" "MITSU"      "MITSUBISHI"
## [46] "NISS"       "NISSAN"     "OLDS"       "OLDSMOBILE" "PLYM"      
## [51] "PLYMOTH"    "PLYMOUTH"   "PONT"       "PONTIAC"    "SAA"       
## [56] "SATURN"     "SCION"      "SUBA"       "SUBARU"     "TOTY"      
## [61] "TOYOTA"     "TOYT"       "VOLKSWAGON" "VOLV"       "VOLVO"     
## [66] "VW"
snowData$Make[snowData$Make == "ACUR"] <- "ACURA"
snowData$Make[snowData$Make == "CADI"] <- "CADILLAC"
snowData$Make[snowData$Make == "CADILAC"] <- "CADILLAC"
snowData$Make[snowData$Make == "CHEV"] <- "CHEVROLET"
snowData$Make[snowData$Make == "CHEVR"] <- "CHEVROLET"
snowData$Make[snowData$Make == "CHEVY"] <- "CHEVROLET"
snowData$Make[snowData$Make == "CHRY"] <- "CHRYSLER"
snowData$Make[snowData$Make == "DODGW"] <- "DODGE"
snowData$Make[snowData$Make == "HOND"] <- "HONDA"
snowData$Make[snowData$Make == "HYNDAI"] <- "HYUNDAI"
snowData$Make[snowData$Make == "HYUN"] <- "HYUNDAI"
snowData$Make[snowData$Make == "HYUNDIA"] <- "HYUNDAI"
snowData$Make[snowData$Make == "INFI"] <- "INFINITI"
snowData$Make[snowData$Make == "INFINITY"] <- "INFINITI"
snowData$Make[snowData$Make == "JAGAR"] <- "JAGUAR"
snowData$Make[snowData$Make == "LEXS"] <- "LEXUS"
snowData$Make[snowData$Make == "MERC"] <- "MERCURY"
snowData$Make[snowData$Make == " MITS"] <- "MITSUBISHI"
snowData$Make[snowData$Make == "MITS"] <- "MITSUBISHI"
snowData$Make[snowData$Make == "MITSBUISHI"] <- "MITSUBISHI"
snowData$Make[snowData$Make == " MITSU"] <- "MITSUBISHI"
snowData$Make[snowData$Make == "NISS"] <- "NISSAN"
snowData$Make[snowData$Make == "OLDS"] <- "OLDSMOBILE"
snowData$Make[snowData$Make == "PLYM"] <- "PLYMOUTH"
snowData$Make[snowData$Make == "PLYMOTH"] <- "PLYMOUTH"
snowData$Make[snowData$Make == "PONT"] <- "PONTIAC"
snowData$Make[snowData$Make == "SUBA"] <- "SUBARU"
snowData$Make[snowData$Make == "TOTY"] <- "TOYOTA"
snowData$Make[snowData$Make == "TOYT"] <- "TOYOTA"
snowData$Make[snowData$Make == "VOLV"] <- "VOLVO"
snowData$Make[snowData$Make == "VW"] <- "VOLKSWAGON"

# CLEAN.snowData is a nice subset of the variables for our inquiry!
CLEAN.snowData <- subset(snowData, select = c(Tow.Firm, Tow.Address, Make, Color, Year))

4. Analysis & Exploration

#####################################################################
# Which tow company been towing the most vehicles?
# Ideally some meta-data about the Tow.Firms would be very helpful
# in analyzing whether one company had more tow trucks on the road or
# more employees on that day!
#####################################################################
Tow <- count(CLEAN.snowData$Tow.Firm)

barplot(height = Tow$freq, names.arg = Tow$x, horiz=F, 
        main ="Which company has towed the most vehicles?", xlab ="Tow.Firm"
        , ylab ="Count")

## FRIENDLY AUTO BODY & TOWING has towed the most vehicles


#####################################################################
# Is there any vehicle color that certain tow firms target?
# One could deduce that brighter colors stand out in the white snow
# more, but since these are contracters working on guidlines to 
# tow vehicles that are deliquent in shoveling/moving thier cars
# i'd say all the cars are fair game, and that color popularity lies with
# vehicle buyers
######################################################################
ct = table(CLEAN.snowData$Color, CLEAN.snowData$Tow.Firm)
barplot(ct, las=2, legend = unique(CLEAN.snowData$Color))

#####################################################################
# Is there a parking location that tow firm target the most?
# This can be nicely visualized in a map with the ggmap package
#####################################################################
CLEAN.TA <- CLEAN.snowData$Tow.Address
map.image.address <- paste(CLEAN.TA, ", Hartford, CT")
geo <- geocode(location = map.image.address, output="latlon")

MAP <- ggmap(
  get_map(location = 'Hartford', color="color", source="google", 
              maptype="roadmap", zoom=13), 
  extent ="device")+ geom_point(data = geo, aes(x = lon, y = lat), alpha=0.5)
MAP