library(ggmap)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.4
#Getting data:
CrimeStats <- read.csv("CrimeData.csv")
str(CrimeStats)
## 'data.frame':    29711 obs. of  12 variables:
##  $ X            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Number       : Factor w/ 29711 levels "000061951901-000",..: 61 1389 36 11104 23873 22243 42 26 6186 39 ...
##  $ Category     : Factor w/ 9 levels "aggravated assault",..: 4 5 5 6 8 5 6 6 5 5 ...
##  $ Address      : Factor w/ 12270 levels "100XX N 11TH AVE",..: 8881 1112 8575 11994 859 5515 9958 79 5858 11061 ...
##  $ ZIP          : int  85031 85014 85014 85029 85006 85017 85043 85307 85018 85051 ...
##  $ Premises     : Factor w/ 73 levels "","07a storeroom/shed (commercial)",..: 6 52 6 63 63 52 63 63 36 38 ...
##  $ OccuranceDate: Factor w/ 19037 levels "2015-11-01 00:00:00",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ EndDate      : Factor w/ 15545 levels "2015-10-01 23:59:00",..: 46 844 21 5927 NA 12235 30 15 1410 NA ...
##  $ Duration     : Factor w/ 2460 levels "-109d -17H -17M 0S",..: 571 230 2415 2060 NA 474 332 1993 633 NA ...
##  $ Weekday      : Factor w/ 7 levels "Fri","Mon","Sat",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Month        : Factor w/ 6 levels "Apr","Dec","Feb",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ NA.          : int  0 0 0 0 0 0 0 0 0 0 ...
#Extract ZIP codes as a vector:
ZIP <- CrimeStats$ZIP
unique(ZIP)
##  [1] 85031 85014 85029 85006 85017 85043 85307 85018 85051 85027 85024
## [12] 85008 85254 85028 85015 85009 85339 85023 85044 85035 85041 85003
## [23] 85353 85048 85016 85022 85033 85040 85007 85019 85050 85004 85383
## [34] 85054 85032 85034 85013 85042 85012 85021 85053 85308 85086 85020
## [45] 85037 85085 85331 85306 85083 85310 85045 85087 85301 85253 85304
## [56] 85255 85251 85258 85225 85282 85268 85213 85395 85202 85201 85381
## [67] 85281 85323 85224 85345 85302 85266 85382 85392 85303 85260
UniqueZip <- unique(ZIP)
str(UniqueZip)
##  int [1:76] 85031 85014 85029 85006 85017 85043 85307 85018 85051 85027 ...
#Getting zipcode data from file
ZipCodes <- read.csv("free-zipcode-database-Primary.csv")
names(ZipCodes)
##  [1] "Zipcode"             "ZipCodeType"         "City"               
##  [4] "State"               "LocationType"        "Lat"                
##  [7] "Long"                "Location"            "Decommisioned"      
## [10] "TaxReturnsFiled"     "EstimatedPopulation" "TotalWages"
ZipCodesInfo <- subset(ZipCodes, Zipcode %in% UniqueZip, select=c(Zipcode, State, Lat, Long))

#ZipCodesInfo now contains the lat and long per zip code
#Add the lat and long to the data frame:

#Change the zip codes in both data sets to factors:
ZipCodesInfo$Zipcode <- as.factor(ZipCodesInfo$Zipcode)
CrimeStats$ZIP <- as.factor(CrimeStats$ZIP)
CrimeStats$lat <- ZipCodesInfo$Lat[match(CrimeStats$ZIP, ZipCodesInfo$Zipcode)]
CrimeStats$long <- ZipCodesInfo$Long[match(CrimeStats$ZIP, ZipCodesInfo$Zipcode)]
str(CrimeStats)
## 'data.frame':    29711 obs. of  14 variables:
##  $ X            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Number       : Factor w/ 29711 levels "000061951901-000",..: 61 1389 36 11104 23873 22243 42 26 6186 39 ...
##  $ Category     : Factor w/ 9 levels "aggravated assault",..: 4 5 5 6 8 5 6 6 5 5 ...
##  $ Address      : Factor w/ 12270 levels "100XX N 11TH AVE",..: 8881 1112 8575 11994 859 5515 9958 79 5858 11061 ...
##  $ ZIP          : Factor w/ 76 levels "85003","85004",..: 23 9 9 22 3 12 32 64 13 37 ...
##  $ Premises     : Factor w/ 73 levels "","07a storeroom/shed (commercial)",..: 6 52 6 63 63 52 63 63 36 38 ...
##  $ OccuranceDate: Factor w/ 19037 levels "2015-11-01 00:00:00",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ EndDate      : Factor w/ 15545 levels "2015-10-01 23:59:00",..: 46 844 21 5927 NA 12235 30 15 1410 NA ...
##  $ Duration     : Factor w/ 2460 levels "-109d -17H -17M 0S",..: 571 230 2415 2060 NA 474 332 1993 633 NA ...
##  $ Weekday      : Factor w/ 7 levels "Fri","Mon","Sat",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Month        : Factor w/ 6 levels "Apr","Dec","Feb",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ NA.          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lat          : num  33.5 33.5 33.5 33.6 33.5 ...
##  $ long         : num  -112 -112 -112 -112 -112 ...
unique(CrimeStats$lat)
##  [1] 33.49 33.50 33.59 33.46 33.42 33.53 33.55 33.72 33.68 33.61 33.56
## [12] 33.44 33.30 33.63 33.34 33.47 33.38 33.45 33.28 33.62 33.40 33.69
## [23] 33.76 33.67 33.43 33.37 33.66 33.83 33.57 33.48 33.75 33.80 33.73
## [34] 33.71 33.95 33.54 33.32 33.60 33.65
#Comment: not a large difference in lat and long with ZIP codes

#Getting the map:
mapPhoenix <- get_map(location = c(long = mean(CrimeStats$long), 
                                   lat = mean(CrimeStats$lat)), zoom = 9, maptype = "hybrid", scale = 2)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=33.505619,-112.092044&zoom=9&size=640x640&scale=2&maptype=hybrid&language=en-EN&sensor=false
ggmap(mapPhoenix) + geom_point(data = CrimeStats, 
                               aes(x = long, y = lat, fill = "red", alpha = 0.8),
                               size = 2, shape = 21) +
    guides(fill=FALSE, alpha=FALSE, size=FALSE)

#What would be cool is to make an interactive map to get info from points plotted
#Change color of points to crim category that has the most crimes for the location