Load Datasets

#Data1: Median House price
data1h <- read.csv(file = "C:/Users/illya/Desktop/DATA 205 Spring 2020/Datasets from Zillow/city_Zhvi_AllHomes.csv", check.names = FALSE)

#Data2: Police Dispached Incidents
data2p <- read.csv(file = "C:/Users/illya/Desktop/DATA 205 Spring 2020/Datasets from dataMontgomery/Police_Dispatched_Incidents.csv")

#Population of Montgomery County
data3pop <- read.csv(file = "C:/Users/illya/Desktop/DATA 205 Spring 2020/Datasets from Census Bureau/mcpopulation.csv")

Cleaning and Filtering Datasets

Information of datasets

str(data1h)
## 'data.frame':    27539 obs. of  293 variables:
##  $ RegionID  : int  6181 12447 39051 17426 6915 13271 40326 18959 54296 38128 ...
##  $ RegionName: Factor w/ 17740 levels "Aaronsburg","Abbeville",..: 10709 8814 7060 2696 13581 11979 11991 8284 13589 3615 ...
##  $ State     : Factor w/ 51 levels "AK","AL","AR",..: 35 5 44 15 44 39 4 34 5 44 ...
##  $ Metro     : Factor w/ 861 levels "","Aberdeen",..: 547 453 356 152 682 606 607 429 683 194 ...
##  $ CountyName: Factor w/ 1752 levels "Abbeville County",..: 1282 928 683 373 141 1219 964 326 1396 417 ...
##  $ SizeRank  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ 1996-04   : int  194476 183563 94196 129731 93846 69890 99349 149168 191090 101565 ...
##  $ 1996-05   : int  194292 183612 94351 129410 93783 69779 99943 149330 190767 101951 ...
##  $ 1996-06   : int  194160 183708 94465 129528 93769 69678 100485 149353 190519 102204 ...
##  $ 1996-07   : int  194088 183868 94512 129381 93768 69635 100997 149530 190255 102201 ...
##  $ 1996-08   : int  193954 183832 94606 129759 93869 69563 101482 149923 189946 102364 ...
##  $ 1996-09   : int  193912 183668 94824 130274 93989 69556 101952 150387 189663 102440 ...
##  $ 1996-10   : int  193982 183342 95090 131018 94098 69550 102403 150807 189608 102791 ...
##  $ 1996-11   : int  194136 183207 95407 131858 94234 69675 102864 150977 189603 102998 ...
##  $ 1996-12   : int  194395 183082 95735 132434 94349 69819 103347 151331 189467 103335 ...
##  $ 1997-01   : int  194783 182499 95675 133035 94236 69968 103834 151672 189186 103375 ...
##  $ 1997-02   : int  195773 181932 95429 133566 94112 70057 104315 152214 189148 103552 ...
##  $ 1997-03   : int  196821 181534 95216 134237 94025 70143 104748 152471 189422 103469 ...
##  $ 1997-04   : int  197777 181714 95209 134351 94147 70241 105157 153007 189937 103533 ...
##  $ 1997-05   : int  198234 181949 95230 135061 94234 70360 105580 153376 190359 103501 ...
##  $ 1997-06   : int  198817 182174 95357 135104 94267 70462 106007 153752 190733 103692 ...
##  $ 1997-07   : int  199507 182627 95564 135403 94264 70554 106409 153908 191458 104090 ...
##  $ 1997-08   : int  200418 183536 95751 134511 94305 70572 106804 153841 192530 104365 ...
##  $ 1997-09   : int  201334 184480 95818 133187 94355 70529 107267 153937 193991 104805 ...
##  $ 1997-10   : int  202246 185628 95866 131296 94459 70455 107761 154075 195195 105082 ...
##  $ 1997-11   : int  203408 186510 96005 129590 94513 70346 108240 154527 196645 105449 ...
##  $ 1997-12   : int  204616 187846 96148 128761 94586 70216 108686 154891 198429 105696 ...
##  $ 1998-01   : int  205901 189758 96785 128405 95104 70102 109215 155517 200814 106076 ...
##  $ 1998-02   : int  206513 192031 97451 128349 95636 70080 109762 156095 203226 106443 ...
##  $ 1998-03   : int  207017 194072 98163 128558 96156 70114 110288 156727 205397 106909 ...
##  $ 1998-04   : int  207400 195489 98444 128914 96175 70118 110744 156986 207390 107005 ...
##  $ 1998-05   : int  208200 197007 98742 129251 96347 70104 111222 157149 209408 107298 ...
##  $ 1998-06   : int  209030 198384 98962 129622 96547 70133 111717 157643 211811 107453 ...
##  $ 1998-07   : int  209813 199332 99196 130258 96833 70078 112344 158015 213916 107944 ...
##  $ 1998-08   : int  210858 200495 99365 131871 96923 70289 113048 158389 216078 108277 ...
##  $ 1998-09   : int  212042 202398 99553 134265 96995 70503 113714 158422 217979 108730 ...
##  $ 1998-10   : int  213492 204595 99740 137388 97056 70856 114364 158714 220197 108666 ...
##  $ 1998-11   : int  214692 206821 99684 140116 97165 70946 115044 159094 222170 108825 ...
##  $ 1998-12   : int  215924 208454 99616 142155 97332 71098 115727 159297 224188 108811 ...
##  $ 1999-01   : int  216990 210458 99669 143293 97334 71208 116195 159135 225865 109493 ...
##  $ 1999-02   : int  218352 212274 100221 144283 97486 71391 116636 158941 227600 110245 ...
##  $ 1999-03   : int  219798 214148 101040 145085 97697 71505 117160 158985 229220 111236 ...
##  $ 1999-04   : int  221670 215837 101855 146033 98212 71677 117847 159280 231047 112192 ...
##  $ 1999-05   : int  223666 217501 102571 146729 98884 71744 118581 159769 233155 112946 ...
##  $ 1999-06   : int  225814 219870 103192 147993 99499 71861 119570 160398 235049 113807 ...
##  $ 1999-07   : int  228303 222541 103809 149427 100051 72036 120489 161616 237383 114400 ...
##  $ 1999-08   : int  230542 224337 104419 150603 100361 72154 121371 162667 239452 115003 ...
##  $ 1999-09   : int  232806 225471 104877 151250 100698 72265 122112 163672 241510 115138 ...
##  $ 1999-10   : int  234645 226607 105277 151677 101014 72276 122860 164263 243795 115776 ...
##  $ 1999-11   : int  236424 228399 105699 152519 101387 72520 123523 164958 246429 116345 ...
##  $ 1999-12   : int  238239 230173 106183 153542 101755 72714 124197 165627 249440 117184 ...
##  $ 2000-01   : int  240213 231907 106603 155208 102026 73043 125080 166359 252731 117487 ...
##  $ 2000-02   : int  242890 233428 106744 156900 102207 73349 126053 166938 255930 117993 ...
##  $ 2000-03   : int  245938 235370 106688 158672 102276 73719 127004 167585 259241 118165 ...
##  $ 2000-04   : int  248992 237384 106729 160246 102317 74059 127944 168355 262300 118383 ...
##  $ 2000-05   : int  251844 239689 106746 161908 101673 74602 128880 169115 265451 118511 ...
##  $ 2000-06   : int  254521 241675 106931 163457 101031 75079 129594 169741 269729 118897 ...
##  $ 2000-07   : int  257230 243587 107119 164882 100387 75557 130333 170045 274298 119400 ...
##  $ 2000-08   : int  259619 245877 107486 166289 100390 75834 131046 170501 279185 119893 ...
##  $ 2000-09   : int  262125 248261 107874 167704 100388 76198 131938 171147 283707 120536 ...
##  $ 2000-10   : int  264876 250356 108145 169134 100374 76624 132745 171918 287728 121080 ...
##  $ 2000-11   : int  268108 252344 108653 170798 100299 77039 133558 172734 291906 121612 ...
##  $ 2000-12   : int  270980 254471 109147 172528 100205 77485 134392 173516 295849 122126 ...
##  $ 2001-01   : int  273717 256491 109399 174075 100102 77860 135224 174420 299659 122554 ...
##  $ 2001-02   : int  276310 258626 109434 175352 99918 78226 136046 175532 304110 122222 ...
##  $ 2001-03   : int  279798 260693 109109 176757 99691 78603 136845 176544 309071 122233 ...
##  $ 2001-04   : int  283250 263180 108989 178333 99402 79055 137580 177444 314667 122351 ...
##  $ 2001-05   : int  286337 265359 109011 180038 99418 79387 138313 178309 319181 122938 ...
##  $ 2001-06   : int  288555 267692 109180 181611 99471 79674 139024 179279 322391 122787 ...
##  $ 2001-07   : int  290646 269654 109320 182989 99585 79925 139708 180318 324329 122679 ...
##  $ 2001-08   : int  292892 271819 109203 184440 99485 80336 140358 181385 326356 122757 ...
##  $ 2001-09   : int  294933 273835 109314 185937 99345 80704 141031 182464 328856 123009 ...
##  $ 2001-10   : int  297101 276547 109616 187583 99257 81040 141754 183503 331819 123201 ...
##  $ 2001-11   : int  299652 279092 109878 189057 99131 81402 142574 184299 334966 123275 ...
##  $ 2001-12   : int  302686 281955 110084 190259 99085 81806 143294 185223 338036 123400 ...
##  $ 2002-01   : int  305858 284728 110256 191466 99058 82223 143947 186082 341873 123417 ...
##  $ 2002-02   : int  307816 288005 110567 192779 99170 82604 144482 186914 345309 123799 ...
##  $ 2002-03   : int  307467 291418 111097 194367 99431 83081 145114 187675 348674 123937 ...
##  $ 2002-04   : int  306729 295210 111719 195568 99767 83539 145709 188600 351871 124326 ...
##  $ 2002-05   : int  306738 298915 112270 196874 100153 84056 146265 189413 356040 124627 ...
##  $ 2002-06   : int  308624 303068 112773 197933 100474 84509 146880 190081 361062 125441 ...
##  $ 2002-07   : int  310849 307730 113229 199499 100777 85017 147489 190616 366930 126270 ...
##  $ 2002-08   : int  313869 312648 113841 201032 101098 85470 148187 191348 373271 126970 ...
##  $ 2002-09   : int  317123 317708 114334 202795 101418 86055 148730 192110 379093 127610 ...
##  $ 2002-10   : int  320162 322922 114695 204260 101824 86794 149370 193341 385289 128115 ...
##  $ 2002-11   : int  322241 328630 115013 205439 102384 87562 150050 194674 391111 128665 ...
##  $ 2002-12   : int  324349 334233 115249 206840 102845 88259 150859 196154 399610 129182 ...
##  $ 2003-01   : int  326472 339255 115622 208239 103005 88957 151705 197569 407765 129340 ...
##  $ 2003-02   : int  329766 343755 115739 210001 102786 89817 152516 198984 415147 129409 ...
##  $ 2003-03   : int  334245 348753 115791 211456 102659 90726 153239 199971 420816 129273 ...
##  $ 2003-04   : int  339404 353121 115814 213137 102641 91688 153994 200837 425579 129296 ...
##  $ 2003-05   : int  344113 358042 115894 214274 102922 92503 154694 201831 430663 129414 ...
##  $ 2003-06   : int  348769 362118 116236 215579 103226 93350 155329 203050 434779 129651 ...
##  $ 2003-07   : int  352307 367913 116408 216708 103652 94100 155746 204493 440473 129816 ...
##  $ 2003-08   : int  355559 374026 116790 218225 104096 94892 156233 206342 445696 129994 ...
##  $ 2003-09   : int  357704 380456 116961 219760 104427 95793 156997 209188 452030 130277 ...
##  $ 2003-10   : int  360184 385695 117294 221534 104717 96754 157894 212628 457335 130818 ...
##  $ 2003-11   : int  362922 392074 117539 223244 104761 97780 158756 216753 463578 131202 ...
##  $ 2003-12   : int  365774 399205 117903 224947 105099 98724 159580 220570 464726 131508 ...
##   [list output truncated]
#convert the coulumn of "Priority" from integer to string
data2p$Priority <- as.character(data2p$Priority)
str(data2p)
## 'data.frame':    642435 obs. of  26 variables:
##  $ Incident_ID           : Factor w/ 642435 levels "P1700500000",..: 1 2 4 5 540887 8 562621 11 12 537330 ...
##  $ Crime.Reports         : Factor w/ 99508 levels "","170500041",..: 1 1 1 1 1 1 1 2 1 1 ...
##  $ Crash.Reports         : Factor w/ 23951 levels "","170500026",..: 1 1 1 1 20089 1 1 1 1 1 ...
##  $ Start.Time            : Factor w/ 510889 levels "1/1/2018 0:01",..: 258428 258429 258392 258393 475684 258396 62887 258399 258400 509477 ...
##  $ End.Time              : Factor w/ 505708 levels "","1/1/2018 0:00",..: 256348 256349 256323 256313 470969 256314 62480 256320 256319 504286 ...
##  $ Priority              : chr  "4" "1" "4" "2" ...
##  $ Initial.Type          : Factor w/ 177 levels "ABDUCTION (KIDNAPPING) - CUSTODIAL ABDUCTION, HOSTAGE SITUAT",..: 97 12 96 61 158 96 158 57 61 37 ...
##  $ Close.Type            : Factor w/ 176 levels "10 SIGNAL ALARM",..: 99 12 98 63 158 98 158 59 63 38 ...
##  $ Address               : Factor w/ 39448 levels "","1  BLK  16TH ST",..: 12811 21298 1328 5829 29612 1328 27238 15306 1 22607 ...
##  $ City                  : Factor w/ 51 levels "","ADELPHI","ALEXANDRIA",..: 46 46 44 44 26 44 46 33 37 18 ...
##  $ State                 : Factor w/ 6 levels "","DC","DCMD",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Zip                   : Factor w/ 221 levels "","20001","20002",..: 118 123 89 92 104 89 120 114 110 101 ...
##  $ Longitude             : num  -77 -77 -77.2 -77.1 -77.2 ...
##  $ Latitude              : num  39.1 39 39.1 39.1 39.2 ...
##  $ Police.District.Number: Factor w/ 9 levels "1D","2D","3D",..: 4 3 1 4 5 1 3 2 6 5 ...
##  $ Beat                  : Factor w/ 41 levels "","A1","A2","A3",..: 27 18 4 26 29 4 21 11 39 30 ...
##  $ PRA                   : Factor w/ 785 levels "","0","1","10",..: 487 12 175 477 412 175 55 239 361 575 ...
##  $ CallTime.CallRoute    : int  45 451 201 98 99 57 92 195 141 0 ...
##  $ Calltime.Dispatch     : int  68 768 218 249 118 306 282 2882 451 77 ...
##  $ Calltime.Arrive       : int  NA 1022 267 992 347 NA 627 3567 878 604 ...
##  $ Calltime.Cleared      : int  93 1236 18846 1278 4296 556 1801 6426 1393 628 ...
##  $ CallRoute.Dispatch    : int  22 316 17 150 18 248 189 2686 309 77 ...
##  $ Dispatch.Arrive       : int  NA 253 48 743 228 NA 345 685 427 527 ...
##  $ Arrive.Cleared        : int  NA 214 18579 286 3949 NA 1174 2859 515 24 ...
##  $ Disposition.Desc      : Factor w/ 459 levels "","ABANDONED LIVESTOCK",..: 336 44 336 424 188 226 188 409 259 336 ...
##  $ Location              : Factor w/ 36942 levels "","POINT (-76.8682 39.0762)",..: 8127 6227 21679 13952 31434 21679 1668 12081 27899 27795 ...
str(data3pop)
## 'data.frame':    30 obs. of  5 variables:
##  $ City       : Factor w/ 30 levels "ASHTON-SANDY SPRING",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ State      : Factor w/ 1 level "MD": 1 1 1 1 1 1 1 1 1 1 ...
##  $ County     : Factor w/ 1 level "Montgomery County": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Census.2000: int  3437 161 115 15690 55277 NA 343 120 1734 16427 ...
##  $ Census.2010: int  5628 172 214 16772 60858 10460 321 134 2280 16660 ...

Remove NA values and unnecassary columns and rows

Data1: Median House Price from 01/2017 to 12/2019

data1h <- data1h[-c(1, 4, 6:255, 292, 293)]
dim(data1h)
## [1] 27539    39
head(data1h)
##     RegionName State          CountyName 2017-01 2017-02 2017-03 2017-04
## 1     New York    NY       Queens County  584554  587374  590257  592355
## 2  Los Angeles    CA  Los Angeles County  626685  629700  633282  635542
## 3      Houston    TX       Harris County  169343  169777  170364  171320
## 4      Chicago    IL         Cook County  229084  230065  231943  233286
## 5  San Antonio    TX        Bexar County  159974  161667  162749  163152
## 6 Philadelphia    PA Philadelphia County  145257  145435  145489  146713
##   2017-05 2017-06 2017-07 2017-08 2017-09 2017-10 2017-11 2017-12 2018-01
## 1  595788  599291  603820  607738  611763  616309  619990  622861  625073
## 2  640115  643978  648564  652647  656640  660186  666312  672414  678260
## 3  171917  172378  172628  172850  173191  173616  174125  174551  175140
## 4  234326  234756  235202  236139  237387  238218  238754  239760  241448
## 5  163373  163943  164820  165871  166885  167955  168964  169341  169216
## 6  148192  149284  150178  150986  151974  153515  154850  156270  157617
##   2018-02 2018-03 2018-04 2018-05 2018-06 2018-07 2018-08 2018-09 2018-10
## 1  628962  632881  636989  640001  643186  645492  648308  651418  653854
## 2  683612  689579  693796  695838  697236  700288  702764  704865  706364
## 3  175609  176262  177303  178418  179290  180016  180912  181981  182788
## 4  242646  242589  242554  243004  243961  244435  244794  245413  246119
## 5  169119  170076  171252  172503  173370  174291  175125  175441  176028
## 6  159632  161964  163759  165033  165917  167216  168577  169961  170934
##   2018-11 2018-12 2019-01 2019-02 2019-03 2019-04 2019-05 2019-06 2019-07
## 1  656243  657655  658328  658200  658150  658181  658154  657709  657309
## 2  708192  708731  709587  706204  702990  700113  701035  702932  704227
## 3  183556  184265  185482  186383  186962  187018  187323  187833  188477
## 4  246857  247150  246646  246233  246214  246644  246922  246991  247258
## 5  176340  177668  178605  179998  180492  181318  181894  182653  183288
## 6  172218  173440  175027  176041  176695  177189  178084  179267  180296
##   2019-08 2019-09 2019-10 2019-11 2019-12
## 1  656607  655407  654263  653269  653086
## 2  706438  709487  713791  717913  724102
## 3  189086  189477  190036  190412  190829
## 4  247325  247065  246830  246605  246497
## 5  183869  184765  185365  186050  186132
## 6  181277  182176  182985  183584  184017
data1h <- filter(data1h, State == "MD")
names(data1h)[names(data1h) == "RegionName"] <- "city"
data1h <- filter(data1h, CountyName == "Montgomery County")
data1h$city <- gsub("Sandy Spring", "Ashton-Sandy Spring", data1h$city)
head(data1h)
##             city State        CountyName 2017-01 2017-02 2017-03 2017-04
## 1  Silver Spring    MD Montgomery County  390620  392487  395550  396275
## 2     Germantown    MD Montgomery County  291883  293267  295705  296331
## 3       Bethesda    MD Montgomery County  896903  898448  903592  904371
## 4   Gaithersburg    MD Montgomery County  368529  370345  373394  374416
## 5      Rockville    MD Montgomery County  501038  503002  506782  507513
## 6 North Bethesda    MD Montgomery County  499352  500774  504089  504867
##   2017-05 2017-06 2017-07 2017-08 2017-09 2017-10 2017-11 2017-12 2018-01
## 1  396810  397301  398409  399917  400649  401799  403398  404867  403991
## 2  296689  297053  298068  299153  299483  299749  300672  301461  300785
## 3  905178  905056  905350  905851  904372  906099  908490  912808  911467
## 4  375105  375409  376512  377779  378432  378835  379742  380717  379476
## 5  508353  508382  509133  509784  509574  509880  510939  511967  510069
## 6  505304  505726  506854  507860  507751  508018  509023  510024  508005
##   2018-02 2018-03 2018-04 2018-05 2018-06 2018-07 2018-08 2018-09 2018-10
## 1  405137  406518  409937  411555  413016  414837  416193  417308  417280
## 2  301398  302329  304773  305909  306714  307552  308227  308794  308906
## 3  915507  918452  924509  925451  926120  928273  929363  930919  929842
## 4  379885  380437  383072  383925  384539  385236  385666  386160  386305
## 5  510892  512176  516266  517852  519130  520562  521655  522614  522285
## 6  508707  509643  513096  514447  515259  516275  516882  517742  517646
##   2018-11 2018-12 2019-01 2019-02 2019-03 2019-04 2019-05 2019-06 2019-07
## 1  417085  418167  421278  422691  422709  421560  422156  422926  423173
## 2  308767  309579  311669  312509  312226  311086  311325  311863  312191
## 3  929418  929906  935502  934825  932861  928090  928897  930650  931695
## 4  386470  387711  390667  391811  391679  390188  390592  391444  391945
## 5  521718  522561  526000  526753  525500  522429  521891  521992  521660
## 6  517227  518516  522371  523718  523059  520663  520787  521513  521859
##   2019-08 2019-09 2019-10 2019-11 2019-12
## 1  423568  424269  425641  427091  427807
## 2  312607  313284  314321  315430  316055
## 3  933655  935748  938883  941983  944549
## 4  392552  393065  393931  394714  394826
## 5  521549  521675  522626  523679  523907
## 6  522488  523046  524240  525592  526032
data1hc <- filter(data1h, 
                   city == "Ashton-Sandy Spring" | city == "Barnesville" | city ==
                    "Beallsville" | city == "Bethesda" | city == "Boyds" | city == 
                    "Brinklow" | city == "Brookeville" | city == "Cabin John" | city
                   == "Chevy Chase" | city == "Clarksburg" | city == "Damascus" | city 
                   == "Derwood" | city == "Dickerson"| city == "Gaithersburg" | city
                   == "Garrett Park" | city == "Germantown" | city == "Glen Echo" | 
                    city == "Kensington" | city == "Montgomery Village" | city == "Olney"
                  | city == "Poolesville" | city == "Potomac" | city == "Rockville" |
                    city == "Silver Spring" | city == "Spencerville" | city == 
                    "Washington Grove"
                   )
#drop Brookville due to outlier with big number
data1hc <- filter(data1hc, city !="Brookeville") 
data1hc[, 1] = toupper(data1hc[, 1])
data1hc <- data1hc[-c(2, 3)]
head(data1hc)
##            city 2017-01 2017-02 2017-03 2017-04 2017-05 2017-06 2017-07 2017-08
## 1 SILVER SPRING  390620  392487  395550  396275  396810  397301  398409  399917
## 2    GERMANTOWN  291883  293267  295705  296331  296689  297053  298068  299153
## 3      BETHESDA  896903  898448  903592  904371  905178  905056  905350  905851
## 4  GAITHERSBURG  368529  370345  373394  374416  375105  375409  376512  377779
## 5     ROCKVILLE  501038  503002  506782  507513  508353  508382  509133  509784
## 6       POTOMAC  951291  953254  959322  960167  961027  960982  962343  964478
##   2017-09 2017-10 2017-11 2017-12 2018-01 2018-02 2018-03 2018-04 2018-05
## 1  400649  401799  403398  404867  403991  405137  406518  409937  411555
## 2  299483  299749  300672  301461  300785  301398  302329  304773  305909
## 3  904372  906099  908490  912808  911467  915507  918452  924509  925451
## 4  378432  378835  379742  380717  379476  379885  380437  383072  383925
## 5  509574  509880  510939  511967  510069  510892  512176  516266  517852
## 6  964886  966953  969420  971791  968505  970046  970918  976201  977724
##   2018-06 2018-07 2018-08 2018-09 2018-10 2018-11 2018-12 2019-01 2019-02
## 1  413016  414837  416193  417308  417280  417085  418167  421278  422691
## 2  306714  307552  308227  308794  308906  308767  309579  311669  312509
## 3  926120  928273  929363  930919  929842  929418  929906  935502  934825
## 4  384539  385236  385666  386160  386305  386470  387711  390667  391811
## 5  519130  520562  521655  522614  522285  521718  522561  526000  526753
## 6  978552  979232  978567  978776  976553  974526  975783  982006  983097
##   2019-03 2019-04 2019-05 2019-06 2019-07 2019-08 2019-09 2019-10 2019-11
## 1  422709  421560  422156  422926  423173  423568  424269  425641  427091
## 2  312226  311086  311325  311863  312191  312607  313284  314321  315430
## 3  932861  928090  928897  930650  931695  933655  935748  938883  941983
## 4  391679  390188  390592  391444  391945  392552  393065  393931  394714
## 5  525500  522429  521891  521992  521660  521549  521675  522626  523679
## 6  980440  975437  974412  974122  973131  972740  972136  972543  972689
##   2019-12
## 1  427807
## 2  316055
## 3  944549
## 4  394826
## 5  523907
## 6  970608

Data2: Police Dispached Incidents

names(data2p)[names(data2p) == "City"] <- "city"

data2p1 <- data2p[!(is.na(data2p$city) | data2p$city==""), ]
data2p1$city <- gsub("SANDY SPRING", "ASHTON-SANDY SPRING", data2p1$city)
data2p1$city <- gsub("ASHTON", "ASHTON-SANDY SPRING", data2p1$city)
head(data2p1)
##   Incident_ID Crime.Reports Crash.Reports      Start.Time        End.Time
## 1 P1700500000                               4/2/2017 6:38   4/2/2017 6:40
## 2 P1700500036                               4/2/2017 8:31   4/2/2017 8:52
## 3 P1700500127                              4/2/2017 10:49  4/2/2017 16:03
## 4 P1700500146                              4/2/2017 11:13  4/2/2017 11:34
## 5 P1900305679                   190044039 9/14/2019 13:17 9/14/2019 14:29
## 6 P1700500197                              4/2/2017 12:03  4/2/2017 12:13
##   Priority                                            Initial.Type
## 1        4 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES,
## 2        1                       ALARMB - ALARM BURGLARY/INTRUSION
## 3        4                                    MIS - MISC ON PATROL
## 4        2                                    DISTURBANCE/NUISANCE
## 5        0                         TRAFFIC/TRANSPORTATION INCIDENT
## 6        4                                    MIS - MISC ON PATROL
##                                                Close.Type
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES,
## 2                       ALARMB - ALARM BURGLARY/INTRUSION
## 3                                    MIS - MISC ON PATROL
## 4                                    DISTURBANCE/NUISANCE
## 5                         TRAFFIC/TRANSPORTATION INCIDENT
## 6                                    MIS - MISC ON PATROL
##                            Address          city State   Zip Longitude Latitude
## 1            2300 BLK  RANDOLPH RD SILVER SPRING    MD 20902 -77.04840 39.05842
## 2            8900 BLK  FAIRVIEW RD SILVER SPRING    MD 20910 -77.02930 39.00069
## 3          1  BLK W MONTGOMERY AVE     ROCKVILLE    MD 20850 -77.15290 39.08423
## 4           13000 BLK  PARKLAND DR     ROCKVILLE    MD 20853 -77.08880 39.06914
## 5 GUNNERS BRANCH RD / FREDERICK RD    GERMANTOWN    MD 20876 -77.23811 39.17546
## 6          1  BLK W MONTGOMERY AVE     ROCKVILLE    MD 20850 -77.15290 39.08423
##   Police.District.Number Beat PRA CallTime.CallRoute Calltime.Dispatch
## 1                     4D   L1 536                 45                68
## 2                     3D   G2 107                451               768
## 3                     1D   A3 255                201               218
## 4                     4D   K2 527                 98               249
## 5                     5D   M1 469                 99               118
## 6                     1D   A3 255                 57               306
##   Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1              NA               93                 22              NA
## 2            1022             1236                316             253
## 3             267            18846                 17              48
## 4             992             1278                150             743
## 5             347             4296                 18             228
## 6              NA              556                248              NA
##   Arrive.Cleared        Disposition.Desc                 Location
## 1             NA      OTHERMISCELLANEOUS POINT (-77.0484 39.0584)
## 2            214  ALARMRESID-UNKNOWNCAUS POINT (-77.0293 39.0007)
## 3          18579      OTHERMISCELLANEOUS POINT (-77.1529 39.0842)
## 4            286 SUSPICIOUSSIT/PRSON/VEH POINT (-77.0888 39.0691)
## 5           3949 COLOTH-DAMAG-ROAD-COLLI POINT (-77.2381 39.1755)
## 6             NA       DISORDERLYCONDUCT POINT (-77.1529 39.0842)
data2p2 <- filter(data2p1,
                  city == "ASHTON-SANDY SPRING" | city == "BARNESVILLE"| city ==
                  "BEALLSVILLE"| city == "BETHESDA" | city == "BOYDS"| city == 
                  "BRINKLOW"| city == "BROOKEVILLE"| city == "CABIN JOHN"| city
                  == "CHEVY CHASE"| city == "CLARKSBURG" | city == "DAMASCUS"| city
                  == "DERWOOD"| city == "DICKERSON"| city == "GAITHERSBURG"| city ==
                  "GARRETT PARK" | city == "GERMANTOWN" | city == "GLEN ECHO" | city 
                  == "KENSINGTON"| city == "MONTGOMERY VILLAGE"| city == "OLNEY"| city 
                  == "POOLESVILLE"| city == "POTOMAC"| city == "ROCKVILLE"| city == 
                  "SILVER SPRING" | city == "SPENCERVILLE"| city == "WASHINGTON GROVE"
                  )
dim(data2p2)
## [1] 630566     26
data2pcf <- data2p2[-c(1:4, 7, 9, 13, 14, 16:17, 25, 26)]
dim(data2pcf)
## [1] 630566     14
data2pcf <- filter(data2pcf, 
                  Police.District.Number == "1D" | Police.District.Number == "2D" |
                  Police.District.Number == "3D" | Police.District.Number == "4D" |
                  Police.District.Number == "5D" | Police.District.Number == "6D"
                  )
dim(data2pcf)
## [1] 630502     14
data2pcf <- filter(data2pcf, 
                  Priority == "0" | Priority == "1" | Priority == "2" | Priority == "3"
                  | Priority == "4"
                  )
head(data2pcf)
##          End.Time Priority
## 1   4/2/2017 6:40        4
## 2   4/2/2017 8:52        1
## 3  4/2/2017 16:03        4
## 4  4/2/2017 11:34        2
## 5 9/14/2019 14:29        0
## 6  4/2/2017 12:13        4
##                                                Close.Type          city State
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING    MD
## 2                       ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING    MD
## 3                                    MIS - MISC ON PATROL     ROCKVILLE    MD
## 4                                    DISTURBANCE/NUISANCE     ROCKVILLE    MD
## 5                         TRAFFIC/TRANSPORTATION INCIDENT    GERMANTOWN    MD
## 6                                    MIS - MISC ON PATROL     ROCKVILLE    MD
##     Zip Police.District.Number CallTime.CallRoute Calltime.Dispatch
## 1 20902                     4D                 45                68
## 2 20910                     3D                451               768
## 3 20850                     1D                201               218
## 4 20853                     4D                 98               249
## 5 20876                     5D                 99               118
## 6 20850                     1D                 57               306
##   Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1              NA               93                 22              NA
## 2            1022             1236                316             253
## 3             267            18846                 17              48
## 4             992             1278                150             743
## 5             347             4296                 18             228
## 6              NA              556                248              NA
##   Arrive.Cleared
## 1             NA
## 2            214
## 3          18579
## 4            286
## 5           3949
## 6             NA
#Split date and time
data2pcf$datetime <- as.POSIXct(data2pcf$End.Time, 
                                format = "%m/%d/%Y %H:%M"
                                )

data2pc <- transform(data2pcf, 
                     time = format(datetime, "%T"), 
                     date = format(datetime, "%m/%d/%Y")
                     )

#drop Brookville due to outlier with big number
data2pc <- filter(data2pc, city !="BROOKEVILLE") 
data2pc <- data2pc[-c(5, 6)]
head(data2pc)
##          End.Time Priority
## 1   4/2/2017 6:40        4
## 2   4/2/2017 8:52        1
## 3  4/2/2017 16:03        4
## 4  4/2/2017 11:34        2
## 5 9/14/2019 14:29        0
## 6  4/2/2017 12:13        4
##                                                Close.Type          city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2                       ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3                                    MIS - MISC ON PATROL     ROCKVILLE
## 4                                    DISTURBANCE/NUISANCE     ROCKVILLE
## 5                         TRAFFIC/TRANSPORTATION INCIDENT    GERMANTOWN
## 6                                    MIS - MISC ON PATROL     ROCKVILLE
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     4D                 45                68              NA
## 2                     3D                451               768            1022
## 3                     1D                201               218             267
## 4                     4D                 98               249             992
## 5                     5D                 99               118             347
## 6                     1D                 57               306              NA
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1               93                 22              NA             NA
## 2             1236                316             253            214
## 3            18846                 17              48          18579
## 4             1278                150             743            286
## 5             4296                 18             228           3949
## 6              556                248              NA             NA
##              datetime     time       date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 16:03:00 16:03:00 04/02/2017
## 4 2017-04-02 11:34:00 11:34:00 04/02/2017
## 5 2019-09-14 14:29:00 14:29:00 09/14/2019
## 6 2017-04-02 12:13:00 12:13:00 04/02/2017

Data3: Population of Montgomery County in 2010

names(data3pop)[names(data3pop) == "City"] <- "city"

data3popc <- filter(data3pop, 
                    city == "ASHTON-SANDY SPRING" | city == "BARNESVILLE"| city ==
                    "BEALLSVILLE"| city == "BETHESDA" | city == "BOYDS"| city == 
                    "BRINKLOW"| city == "BROOKEVILLE"| city == "CABIN JOHN"| city ==
                    "CHEVY CHASE"| city == "CLARKSBURG" | city == "DAMASCUS"| city ==
                    "DERWOOD"| city == "DICKERSON"| city == "GAITHERSBURG"| city == 
                    "GARRETT PARK" | city == "GERMANTOWN" | city == "GLEN ECHO" | city
                    == "KENSINGTON"| city == "MONTGOMERY VILLAGE"| city == "OLNEY"| city
                    == "POOLESVILLE"| city == "POTOMAC"| city == "ROCKVILLE"| city ==
                    "SILVER SPRING" | city == "SPENCERVILLE"| city == "WASHINGTON GROVE"
                    )

#drop Brookville due to outlier with big number
data3popc <- filter(data3popc, city !="BROOKEVILLE") 
data3popc <- data3popc[-c(2: 4)]
head(data3popc)
##                  city Census.2010
## 1 ASHTON-SANDY SPRING        5628
## 2         BARNESVILLE         172
## 3         BEALLSVILLE         214
## 4            BETHESDA       60858
## 5               BOYDS       10460
## 6            BRINKLOW         321

Getting some tables from a dataset - Police Dispatched Incidents

Total Police Dispatched Incidents from 2017 to 2019

data2pctable <- table(data2pc$city, 
                      data2pc$Priority)
names(dimnames(data2pctable)) <- c("city", "Severity")
data2pctable
##                      Severity
## city                      0     1     2     3     4
##   ASHTON-SANDY SPRING   104   515   294    67   150
##   BARNESVILLE             6    48    35    11    13
##   BEALLSVILLE            10    47    55    13    22
##   BETHESDA             3539 21062 11918  2595  8667
##   BOYDS                 518  1420   755   233   530
##   BRINKLOW               30   119    71    15    34
##   CABIN JOHN             54   298   114    62   153
##   CHEVY CHASE          1087  7124  3759   760  2962
##   CLARKSBURG            822  4625  1756   713  1933
##   DAMASCUS              666  2557  1127   527   892
##   DERWOOD              1232  4538  2930  1024  1698
##   DICKERSON             102   424   363   120   131
##   GAITHERSBURG         9198 37908 21066  6732 14960
##   GARRETT PARK           19   166    60    27    62
##   GERMANTOWN           6747 24962 13491  4646  8741
##   GLEN ECHO              21    69    44     4    40
##   KENSINGTON            930  5046  2258   743  2010
##   MONTGOMERY VILLAGE   2551  9397  3602  1934  3644
##   OLNEY                 950  5048  2200   888  1915
##   POOLESVILLE           225  1087   435   206   360
##   POTOMAC              1424 11467  4100  1224  3347
##   ROCKVILLE            8442 43076 21999  7710 20005
##   SILVER SPRING       22187 83731 56328 14187 35425
##   SPENCERVILLE           53   159   101    31    54
##   WASHINGTON GROVE       15    92    29    22    37
#convert the datatable to a dataframe
data2pcdf <- as.data.frame(data2pctable)
head(data2pcdf)
##                  city Severity Freq
## 1 ASHTON-SANDY SPRING        0  104
## 2         BARNESVILLE        0    6
## 3         BEALLSVILLE        0   10
## 4            BETHESDA        0 3539
## 5               BOYDS        0  518
## 6            BRINKLOW        0   30
#give this description
data2pcdf2 <- data.frame(unclass(table(data2pc$city, 
                                       data2pc$Priority))
                         )
data2pcdf2$city <- row.names(data2pcdf2)
head(data2pcdf2)
##                       X0    X1    X2   X3   X4                city
## ASHTON-SANDY SPRING  104   515   294   67  150 ASHTON-SANDY SPRING
## BARNESVILLE            6    48    35   11   13         BARNESVILLE
## BEALLSVILLE           10    47    55   13   22         BEALLSVILLE
## BETHESDA            3539 21062 11918 2595 8667            BETHESDA
## BOYDS                518  1420   755  233  530               BOYDS
## BRINKLOW              30   119    71   15   34            BRINKLOW
data2pcRS <- rowSums(data2pctable)
head(data2pcRS)
## ASHTON-SANDY SPRING         BARNESVILLE         BEALLSVILLE            BETHESDA 
##                1130                 113                 147               47781 
##               BOYDS            BRINKLOW 
##                3456                 269
summary(data2pcRS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     113     398    5769   25083   21128  211858
rowMeans(data2pctable)
## ASHTON-SANDY SPRING         BARNESVILLE         BEALLSVILLE            BETHESDA 
##               226.0                22.6                29.4              9556.2 
##               BOYDS            BRINKLOW          CABIN JOHN         CHEVY CHASE 
##               691.2                53.8               136.2              3138.4 
##          CLARKSBURG            DAMASCUS             DERWOOD           DICKERSON 
##              1969.8              1153.8              2284.4               228.0 
##        GAITHERSBURG        GARRETT PARK          GERMANTOWN           GLEN ECHO 
##             17972.8                66.8             11717.4                35.6 
##          KENSINGTON  MONTGOMERY VILLAGE               OLNEY         POOLESVILLE 
##              2197.4              4225.6              2200.2               462.6 
##             POTOMAC           ROCKVILLE       SILVER SPRING        SPENCERVILLE 
##              4312.4             20246.4             42371.6                79.6 
##    WASHINGTON GROVE 
##                39.0
data2pcCS <- colSums(data2pctable)
head(data2pcCS)
##      0      1      2      3      4 
##  60932 264985 148890  44494 107785
summary(data2pcCS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   44494   60932  107785  125417  148890  264985
colMeans(data2pctable)
##        0        1        2        3        4 
##  2437.28 10599.40  5955.60  1779.76  4311.40
#Severity / Year / Hour
data2pc171819 <- filter(data2pc, 
                        year(datetime) == 2017 || year(datetime) == 2018 || 
                        year(datetime) == 2019
                        )
head(data2pc171819)
##          End.Time Priority
## 1   4/2/2017 6:40        4
## 2   4/2/2017 8:52        1
## 3  4/2/2017 16:03        4
## 4  4/2/2017 11:34        2
## 5 9/14/2019 14:29        0
## 6  4/2/2017 12:13        4
##                                                Close.Type          city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2                       ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3                                    MIS - MISC ON PATROL     ROCKVILLE
## 4                                    DISTURBANCE/NUISANCE     ROCKVILLE
## 5                         TRAFFIC/TRANSPORTATION INCIDENT    GERMANTOWN
## 6                                    MIS - MISC ON PATROL     ROCKVILLE
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     4D                 45                68              NA
## 2                     3D                451               768            1022
## 3                     1D                201               218             267
## 4                     4D                 98               249             992
## 5                     5D                 99               118             347
## 6                     1D                 57               306              NA
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1               93                 22              NA             NA
## 2             1236                316             253            214
## 3            18846                 17              48          18579
## 4             1278                150             743            286
## 5             4296                 18             228           3949
## 6              556                248              NA             NA
##              datetime     time       date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 16:03:00 16:03:00 04/02/2017
## 4 2017-04-02 11:34:00 11:34:00 04/02/2017
## 5 2019-09-14 14:29:00 14:29:00 09/14/2019
## 6 2017-04-02 12:13:00 12:13:00 04/02/2017
data2pc171819$year <- year(data2pc171819$datetime)
data2pc171819 <- data2pc171819[-c(1, 3:12, 14:15)]
data2pc171819$hour <- hour(data2pc171819$datetime)
data2pc171819 <- data2pc171819[-c(2)]
data2pc171819$Priority <- as.numeric(data2pc171819$Priority)
head(data2pc171819)
##   Priority year hour
## 1        4 2017    6
## 2        1 2017    8
## 3        4 2017   16
## 4        2 2017   11
## 5        0 2019   14
## 6        4 2017   12
tbpc171819 <- table(data2pc171819$Priority, data2pc171819$hour, data2pc171819$year)
head(tbpc171819)
## [1]  952 2749 1837  292  500  676
tbpc171819df <- as.data.frame(tbpc171819)
head(tbpc171819df)
##   Var1 Var2 Var3 Freq
## 1    0    0 2017  952
## 2    1    0 2017 2749
## 3    2    0 2017 1837
## 4    3    0 2017  292
## 5    4    0 2017  500
## 6    0    1 2017  676
names(tbpc171819df)[names(tbpc171819df) == "Var1"] <- "severity"
names(tbpc171819df)[names(tbpc171819df) == "Var2"] <- "hour"
names(tbpc171819df)[names(tbpc171819df) == "Var3"] <- "year"
head(tbpc171819df)
##   severity hour year Freq
## 1        0    0 2017  952
## 2        1    0 2017 2749
## 3        2    0 2017 1837
## 4        3    0 2017  292
## 5        4    0 2017  500
## 6        0    1 2017  676
#barplot(tbpc171819)

- Police Dispatched Incidents 2017

Split the year and time
#2017 whole year
data2pc17 <- filter(data2pc, year(datetime) == 2017)
head(data2pc17)
##         End.Time Priority
## 1  4/2/2017 6:40        4
## 2  4/2/2017 8:52        1
## 3 4/2/2017 16:03        4
## 4 4/2/2017 11:34        2
## 5 4/2/2017 12:13        4
## 6 4/2/2017 14:51        1
##                                                Close.Type          city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2                       ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3                                    MIS - MISC ON PATROL     ROCKVILLE
## 4                                    DISTURBANCE/NUISANCE     ROCKVILLE
## 5                                    MIS - MISC ON PATROL     ROCKVILLE
## 6                                         DECEASED PERSON    KENSINGTON
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     4D                 45                68              NA
## 2                     3D                451               768            1022
## 3                     1D                201               218             267
## 4                     4D                 98               249             992
## 5                     1D                 57               306              NA
## 6                     2D                195              2882            3567
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1               93                 22              NA             NA
## 2             1236                316             253            214
## 3            18846                 17              48          18579
## 4             1278                150             743            286
## 5              556                248              NA             NA
## 6             6426               2686             685           2859
##              datetime     time       date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 16:03:00 16:03:00 04/02/2017
## 4 2017-04-02 11:34:00 11:34:00 04/02/2017
## 5 2017-04-02 12:13:00 12:13:00 04/02/2017
## 6 2017-04-02 14:51:00 14:51:00 04/02/2017
pc17table <- table(data2pc17$Priority, data2pc17$city)
rowSums(pc17table)
##     0     1     2     3     4 
## 17620 69158 44132 10443 28196
hmdfpc17 <- data2pc17[-c(1, 3:12, 14:15)]
hmdfpc17$hour <- hour(hmdfpc17$datetime)
hmdfpc17 <- hmdfpc17[-c(2)]
hmdfpc17$Priority <- as.numeric(hmdfpc17$Priority)
head(hmdfpc17)
##   Priority hour
## 1        4    6
## 2        1    8
## 3        4   16
## 4        2   11
## 5        4   12
## 6        1   14
tbpc17 <- table(hmdfpc17$Priority, hmdfpc17$hour)
head(tbpc17)
##    
##        0    1    2    3    4    5    6    7    8    9   10   11   12   13   14
##   0  952  676  554  473  369  279  309  359  451  529  622  611  683  743  765
##   1 2749 2099 1704 1353 1176 1105 1091 1721 2351 2818 3112 3217 3465 3463 3407
##   2 1837 1423  954  710  588  472  531  976 1459 1680 1708 1808 2012 2243 2155
##   3  292  228  172  113   88   74   71  140  268  421  509  572  562  619  570
##   4  500  337  184  163  133  120  203  622 1092 1477 1780 1889 1963 1819 1684
##    
##       15   16   17   18   19   20   21   22   23
##   0  992 1007  964 1049 1068 1048 1072 1089  956
##   1 3510 4132 3888 3850 4065 3853 4002 3807 3220
##   2 2449 2936 3000 2910 2670 2290 2342 2727 2252
##   3  584  734  708  720  724  653  636  592  393
##   4 1815 2348 2035 1685 1658 1463 1265 1212  749
hourtable <- table(hmdfpc17$hour)
density <- density(hmdfpc17$hour)
barplot(tbpc17)

tbpc17df <- as.data.frame(tbpc17)
names(tbpc17df)[names(tbpc17df) == "Var1"] <- "severity"
names(tbpc17df)[names(tbpc17df) == "Var2"] <- "hour"
head(tbpc17df)
##   severity hour Freq
## 1        0    0  952
## 2        1    0 2749
## 3        2    0 1837
## 4        3    0  292
## 5        4    0  500
## 6        0    1  676
htpc17 <- ggplot(tbpc17df, 
                 aes(hour, severity, fill = Freq)
                 ) + 
          geom_tile() + 
          scale_fill_gradient(low = "white", high = "orange") +
          geom_text(aes(label = Freq), size = 3) +
          ggtitle("2017 Hourly Crime per Severity (0 = most dangerous)")
htpc17

#split the time quarterly in a day
#2017 Day1: 00:00 to 05:59
data2pc17td1 <- filter(data2pc17, hour(datetime) >= 0 & hour(datetime) < 6)
head(data2pc17td1)
##        End.Time Priority                        Close.Type          city
## 1 4/3/2017 1:15        3    MISSING, RUNAWAY, FOUND PERSON     ROCKVILLE
## 2 4/3/2017 3:03        1                   MENTAL DISORDER  GAITHERSBURG
## 3 4/3/2017 4:18        0 SUICIDAL PERSON/ATTEMPTED SUICIDE SILVER SPRING
## 4 4/3/2017 5:53        1                   RESCUE WITH FRS  GAITHERSBURG
## 5 4/4/2017 0:59        1                   RESCUE WITH FRS     ROCKVILLE
## 6 4/4/2017 1:21        2   NON-PRIORITY RESPONSE TRANSPORT       POTOMAC
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     1D                401               656              NA
## 2                     6D                322               447             604
## 3                     3D                131               191            2227
## 4                     6D                264               376             976
## 5                     1D                228               670             951
## 6                     1D                260               410              NA
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             1431                255              NA             NA
## 2             2059                124             157           1455
## 3             4588                 60            2035           2361
## 4             1803                112             600            826
## 5             1175                441             280            224
## 6              758                149              NA             NA
##              datetime     time       date
## 1 2017-04-03 01:15:00 01:15:00 04/03/2017
## 2 2017-04-03 03:03:00 03:03:00 04/03/2017
## 3 2017-04-03 04:18:00 04:18:00 04/03/2017
## 4 2017-04-03 05:53:00 05:53:00 04/03/2017
## 5 2017-04-04 00:59:00 00:59:00 04/04/2017
## 6 2017-04-04 01:21:00 01:21:00 04/04/2017
day1pc17 <- data2pc17td1[-c(1, 3:12, 14:15)]
day1pc17$hour <- hour(day1pc17$datetime)
day1pc17 <- day1pc17[-c(2)]
day1pc17$Priority <- as.numeric(day1pc17$Priority)
head(day1pc17)
##   Priority hour
## 1        3    1
## 2        1    3
## 3        0    4
## 4        1    5
## 5        1    0
## 6        2    1
d1pc17table <- table(day1pc17$Priority, day1pc17$hour)
head(d1pc17table)
##    
##        0    1    2    3    4    5
##   0  952  676  554  473  369  279
##   1 2749 2099 1704 1353 1176 1105
##   2 1837 1423  954  710  588  472
##   3  292  228  172  113   88   74
##   4  500  337  184  163  133  120
d1pc17hrtable <- table(day1pc17$hour)
d1pc17density <- density(day1pc17$hour)
barplot(d1pc17table)

d1pc17tbdf <- as.data.frame(d1pc17table)
names(d1pc17tbdf)[names(d1pc17tbdf) == "Var1"] <- "severity"
names(d1pc17tbdf)[names(d1pc17tbdf) == "Var2"] <- "hour"
head(d1pc17tbdf)
##   severity hour Freq
## 1        0    0  952
## 2        1    0 2749
## 3        2    0 1837
## 4        3    0  292
## 5        4    0  500
## 6        0    1  676
#2017 Day2: 06:00 to 11:59
data2pc17td2 <- filter(data2pc17, hour(datetime) >= 6 & hour(datetime) < 12)
head(data2pc17td2)
##         End.Time Priority
## 1  4/2/2017 6:40        4
## 2  4/2/2017 8:52        1
## 3 4/2/2017 11:34        2
## 4  4/3/2017 9:17        1
## 5 4/3/2017 10:03        1
## 6 4/3/2017 10:50        2
##                                                Close.Type          city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2                       ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3                                    DISTURBANCE/NUISANCE     ROCKVILLE
## 4                                         DECEASED PERSON    GERMANTOWN
## 5                                     BOX ALARM - VIA FRS  GAITHERSBURG
## 6                                         FRAUD/DECEPTION     ROCKVILLE
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     4D                 45                68              NA
## 2                     3D                451               768            1022
## 3                     4D                 98               249             992
## 4                     5D                 62               291             599
## 5                     6D                 82               456             757
## 6                     1D                 79               542             725
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1               93                 22              NA             NA
## 2             1236                316             253            214
## 3             1278                150             743            286
## 4            12877                229             307          12278
## 5             1532                374             300            775
## 6             3799                462             183           3073
##              datetime     time       date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 11:34:00 11:34:00 04/02/2017
## 4 2017-04-03 09:17:00 09:17:00 04/03/2017
## 5 2017-04-03 10:03:00 10:03:00 04/03/2017
## 6 2017-04-03 10:50:00 10:50:00 04/03/2017
day2pc17 <- data2pc17td2[-c(1, 3:12, 14:15)]
day2pc17$hour <- hour(day2pc17$datetime)
day2pc17 <- day2pc17[-c(2)]
day2pc17$Priority <- as.numeric(day2pc17$Priority)
head(day2pc17)
##   Priority hour
## 1        4    6
## 2        1    8
## 3        2   11
## 4        1    9
## 5        1   10
## 6        2   10
d2pc17table <- table(day2pc17$Priority, day2pc17$hour)
head(d2pc17table)
##    
##        6    7    8    9   10   11
##   0  309  359  451  529  622  611
##   1 1091 1721 2351 2818 3112 3217
##   2  531  976 1459 1680 1708 1808
##   3   71  140  268  421  509  572
##   4  203  622 1092 1477 1780 1889
d2pc17hrtable <- table(day2pc17$hour)
d2pc17density <- density(day2pc17$hour)
barplot(d2pc17table)

d2pc17tbdf <- as.data.frame(d2pc17table)
names(d2pc17tbdf)[names(d2pc17tbdf) == "Var1"] <- "severity"
names(d2pc17tbdf)[names(d2pc17tbdf) == "Var2"] <- "hour"
head(d2pc17tbdf)
##   severity hour Freq
## 1        0    6  309
## 2        1    6 1091
## 3        2    6  531
## 4        3    6   71
## 5        4    6  203
## 6        0    7  359
#2017 Day3: 12:00 to 17:59
data2pc17tn1 <- filter(data2pc17, hour(datetime) >= 12 & hour(datetime) < 18)
head(data2pc17tn1)
##         End.Time Priority                        Close.Type               city
## 1 4/2/2017 16:03        4              MIS - MISC ON PATROL          ROCKVILLE
## 2 4/2/2017 12:13        4              MIS - MISC ON PATROL          ROCKVILLE
## 3 4/2/2017 14:51        1                   DECEASED PERSON         KENSINGTON
## 4 4/2/2017 14:36        1              DISTURBANCE/NUISANCE MONTGOMERY VILLAGE
## 5 4/2/2017 16:05        2                  STATION RESPONSE      SILVER SPRING
## 6 4/2/2017 17:15        0 SUICIDAL PERSON/ATTEMPTED SUICIDE      SILVER SPRING
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     1D                201               218             267
## 2                     1D                 57               306              NA
## 3                     2D                195              2882            3567
## 4                     6D                141               451             878
## 5                     4D                101               500             520
## 6                     2D                 99               156             588
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1            18846                 17              48          18579
## 2              556                248              NA             NA
## 3             6426               2686             685           2859
## 4             1393                309             427            515
## 5             2750                399              19           2229
## 6             3908                 56             431           3320
##              datetime     time       date
## 1 2017-04-02 16:03:00 16:03:00 04/02/2017
## 2 2017-04-02 12:13:00 12:13:00 04/02/2017
## 3 2017-04-02 14:51:00 14:51:00 04/02/2017
## 4 2017-04-02 14:36:00 14:36:00 04/02/2017
## 5 2017-04-02 16:05:00 16:05:00 04/02/2017
## 6 2017-04-02 17:15:00 17:15:00 04/02/2017
day3pc17 <- data2pc17tn1[-c(1, 3:12, 14:15)]
day3pc17$hour <- hour(day3pc17$datetime)
day3pc17 <- day3pc17[-c(2)]
day3pc17$Priority <- as.numeric(day3pc17$Priority)
head(day3pc17)
##   Priority hour
## 1        4   16
## 2        4   12
## 3        1   14
## 4        1   14
## 5        2   16
## 6        0   17
d3pc17table <- table(day3pc17$Priority, day3pc17$hour)
head(d3pc17table)
##    
##       12   13   14   15   16   17
##   0  683  743  765  992 1007  964
##   1 3465 3463 3407 3510 4132 3888
##   2 2012 2243 2155 2449 2936 3000
##   3  562  619  570  584  734  708
##   4 1963 1819 1684 1815 2348 2035
d3pc17hrtable <- table(day3pc17$hour)
d3pc17density <- density(day3pc17$hour)
barplot(d3pc17table)

d3pc17tbdf <- as.data.frame(d3pc17table)
names(d3pc17tbdf)[names(d3pc17tbdf) == "Var1"] <- "severity"
names(d3pc17tbdf)[names(d3pc17tbdf) == "Var2"] <- "hour"
head(d3pc17tbdf)
##   severity hour Freq
## 1        0   12  683
## 2        1   12 3465
## 3        2   12 2012
## 4        3   12  562
## 5        4   12 1963
## 6        0   13  743
#2017 Day4: 18:00 to 23:59
data2pc17tn2 <- filter(data2pc17, hour(datetime) >= 18 & hour(datetime) < 24)
head(data2pc17tn2)
##         End.Time Priority                      Close.Type          city
## 1 4/2/2017 18:59        2 NON-PRIORITY RESPONSE TRANSPORT       POTOMAC
## 2 4/2/2017 18:13        1                 RESCUE WITH FRS SILVER SPRING
## 3 4/2/2017 23:57        2             ASSIST OTHER AGENCY  GAITHERSBURG
## 4 4/2/2017 20:35        2                   THEFT/LARCENY       POTOMAC
## 5 4/2/2017 22:40        2 NON-PRIORITY RESPONSE TRANSPORT       POTOMAC
## 6 4/2/2017 22:16        2               TRAFFIC VIOLATION SILVER SPRING
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     1D                  0              2085            4612
## 2                     3D                139               217              NA
## 3                     6D                  0             17533              NA
## 4                     1D                323              3300            4237
## 5                     1D                197              2997            3847
## 6                     3D                 47               394              NA
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             9423               2085            2527           4811
## 2             1103                 77              NA             NA
## 3            17630              17533              NA             NA
## 4             5464               2976             936           1226
## 5             6822               2799             849           2974
## 6             1967                347              NA             NA
##              datetime     time       date
## 1 2017-04-02 18:59:00 18:59:00 04/02/2017
## 2 2017-04-02 18:13:00 18:13:00 04/02/2017
## 3 2017-04-02 23:57:00 23:57:00 04/02/2017
## 4 2017-04-02 20:35:00 20:35:00 04/02/2017
## 5 2017-04-02 22:40:00 22:40:00 04/02/2017
## 6 2017-04-02 22:16:00 22:16:00 04/02/2017
day4pc17 <- data2pc17tn2[-c(1, 3:12, 14:15)]
day4pc17$hour <- hour(day4pc17$datetime)
day4pc17 <- day4pc17[-c(2)]
day4pc17$Priority <- as.numeric(day4pc17$Priority)
head(day4pc17)
##   Priority hour
## 1        2   18
## 2        1   18
## 3        2   23
## 4        2   20
## 5        2   22
## 6        2   22
d4pc17table <- table(day4pc17$Priority, day4pc17$hour)
head(d4pc17table)
##    
##       18   19   20   21   22   23
##   0 1049 1068 1048 1072 1089  956
##   1 3850 4065 3853 4002 3807 3220
##   2 2910 2670 2290 2342 2727 2252
##   3  720  724  653  636  592  393
##   4 1685 1658 1463 1265 1212  749
d4pc17hrtable <- table(day4pc17$hour)
d4pc17density <- density(day4pc17$hour)
barplot(d4pc17table)

d4pc17tbdf <- as.data.frame(d4pc17table)
names(d4pc17tbdf)[names(d4pc17tbdf) == "Var1"] <- "severity"
names(d4pc17tbdf)[names(d4pc17tbdf) == "Var2"] <- "hour"
head(d4pc17tbdf)
##   severity hour Freq
## 1        0   18 1049
## 2        1   18 3850
## 3        2   18 2910
## 4        3   18  720
## 5        4   18 1685
## 6        0   19 1068
vd4pc17tbdf <- {ggplot(d4pc17tbdf,
                       aes(x = severity, y = Freq, fill = severity)
                       ) + 
                geom_violin(trim = FALSE) +
                stat_summary(fun.y = median, geom = "pointrange", mult = 1, color = "red")
                } %>% 
                ggplotly %>% 
                config(displayModeBar = F)
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Ignoring unknown parameters: mult
vd4pc17tbdf

combine Day1, 2, 3, 4 in 2017 Data

day1234pc17 <- do.call("rbind", list(d1pc17tbdf, d2pc17tbdf, d3pc17tbdf, d4pc17tbdf))
head(day1234pc17)
##   severity hour Freq
## 1        0    0  952
## 2        1    0 2749
## 3        2    0 1837
## 4        3    0  292
## 5        4    0  500
## 6        0    1  676
day1234pc17gg <- ggplot(day1234pc17, 
                        aes(x = hour, y = Freq, group = severity, 
                            shape = severity, color=severity)
                            ) + 
                geom_line() + 
                geom_point(aes(text = 
                               paste(paste("Hour: ", hour, "<br>"),
                               paste("Frequency: ", Freq, "<br>"),
                               paste("Severity: ", severity))),
                            size = 2, 
                            data = day1234pc17
                           ) +
                ggtitle("2017 Hourly Frequency of Severity") +
                theme(axis.text.x = element_text(size = 5)) +
                scale_x_discrete("Hour", breaks = labels, labels = labels) +
                scale_y_continuous("Frequency") +
                facet_grid(.~severity)
## Warning: Ignoring unknown aesthetics: text
day1234pc17gg

day1234pc17gp <- ggplotly(day1234pc17gg, tooltip =  "text") %>% 
                 config(displayModeBar = F)
day1234pc17gp
day1234pc17gg2 <- ggplot(day1234pc17, 
                         aes(x=hour, y=Freq, group=severity, shape=severity,
                            color=severity)
                            ) + 
                  geom_line() + 
                  geom_point(aes(text = 
                                 paste(paste("Hour: ", hour, "<br>"),
                                 paste("Frequency: ", Freq, "<br>"),
                                 paste("Severity: ", severity))),
                              size = 2.5, 
                              data = day1234pc17
                             ) +
                  ggtitle("2017 Hourly Frequency of Severity") +
                  theme(axis.text.x = element_text(size = 10)) +
                  scale_x_discrete("Hour", breaks = labels, labels = labels) +
                  scale_y_continuous("Frequency")
## Warning: Ignoring unknown aesthetics: text
day1234pc17gg2

day1234pc17gp2 <- ggplotly(day1234pc17gg2, tooltip =  "text") %>% 
                  config(displayModeBar = F)
day1234pc17gp2
#City 2017
data2pc17tb <- table(data2pc17$city, data2pc17$Priority)
names(dimnames(data2pc17tb)) <- c("city", "Priority")
head(data2pc17tb)
##                      Priority
## city                     0    1    2    3    4
##   ASHTON-SANDY SPRING   26  121   69   28   51
##   BARNESVILLE            2   12    6    3    5
##   BEALLSVILLE            1   13   10    4   11
##   BETHESDA             945 5727 3422  640 2248
##   BOYDS                144  343  213   65  139
##   BRINKLOW               8   19   17    7   10
#merge with Population
data2317tb <- merge(data2pc17tb, data3popc, all = TRUE)
data2317tb$rate <- round((data2317tb$Freq/data2317tb$Census.2010)*100, digits = 1)
head(data2317tb)
##                  city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING        0   26        5628  0.5
## 2 ASHTON-SANDY SPRING        3   28        5628  0.5
## 3 ASHTON-SANDY SPRING        2   69        5628  1.2
## 4 ASHTON-SANDY SPRING        4   51        5628  0.9
## 5 ASHTON-SANDY SPRING        1  121        5628  2.1
## 6         BARNESVILLE        0    2         172  1.2
data2pc17sum <- rowSums(data2pc17tb)
head(data2pc17sum)
## ASHTON-SANDY SPRING         BARNESVILLE         BEALLSVILLE            BETHESDA 
##                 295                  28                  39               12982 
##               BOYDS            BRINKLOW 
##                 904                  61
summary(data2pc17sum)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      28     121    1531    6782    5752   56826
data2pc17sumdf <- as.data.frame(data2pc17sum)
names(dimnames(data2pc17sumdf)) <- c("city", "Total")
data2pc17sumdf$Total <- data2pc17sumdf$data2pc17sum
data2pc17sumdf$city <- row.names(data2pc17sumdf)
data2pc17sumdf <- data2pc17sumdf[-c(1)]
head(data2pc17sumdf)
##                     Total                city
## ASHTON-SANDY SPRING   295 ASHTON-SANDY SPRING
## BARNESVILLE            28         BARNESVILLE
## BEALLSVILLE            39         BEALLSVILLE
## BETHESDA            12982            BETHESDA
## BOYDS                 904               BOYDS
## BRINKLOW               61            BRINKLOW
#Priority 2017
data2pc17tb2 <- table(data2pc17$Priority, data2pc17$city)
names(dimnames(data2pc17tb2)) <- c("Priority", "city")
head(data2pc17tb2)
##         city
## Priority ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA BOYDS BRINKLOW
##        0                  26           2           1      945   144        8
##        1                 121          12          13     5727   343       19
##        2                  69           6          10     3422   213       17
##        3                  28           3           4      640    65        7
##        4                  51           5          11     2248   139       10
##         city
## Priority CABIN JOHN CHEVY CHASE CLARKSBURG DAMASCUS DERWOOD DICKERSON
##        0         20         311        224      205     319        29
##        1         79        1923       1206      631    1060       104
##        2         29        1060        480      329     769        97
##        3          8         176        172      171     367        30
##        4         33         696        475      195     463        33
##         city
## Priority GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO KENSINGTON
##        0         2682            5       2060         9        249
##        1        10046           46       6660        16       1395
##        2         6265           21       4442         9        644
##        3         1551            4       1056         0        200
##        4         3978           15       2421        10        540
##         city
## Priority MONTGOMERY VILLAGE OLNEY POOLESVILLE POTOMAC ROCKVILLE SILVER SPRING
##        0                755   267          66     465      2451          6365
##        1               2511  1361         315    2973     10811         21718
##        2               1246   686         130    1120      6545         16481
##        3                394   246          68     363      1687          3183
##        4               1053   537          81     831      5263          9079
##         city
## Priority SPENCERVILLE WASHINGTON GROVE
##        0            8                4
##        1           48               20
##        2           32               10
##        3           14                6
##        4           19               10
data2pc17sum2 <- rowSums(data2pc17tb2)
head(data2pc17sum2)
##     0     1     2     3     4 
## 17620 69158 44132 10443 28196
summary(data2pc17sum2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10443   17620   28196   33910   44132   69158
data2pc17sumdf2 <- as.data.frame(data2pc17sum2)
names(dimnames(data2pc17sumdf2)) <- c("Severity", "Total")
data2pc17sumdf2$Total <- data2pc17sumdf2$data2pc17sum
data2pc17sumdf2$Severity <- row.names(data2pc17sumdf2)
data2pc17sumdf2 <- data2pc17sumdf2[-c(1)]
head(data2pc17sumdf2)
##   Total Severity
## 0 17620        0
## 1 69158        1
## 2 44132        2
## 3 10443        3
## 4 28196        4
#conver the data.table to a dataframe
data2pc17df <- as.data.frame(data2pc17tb)
data2pc17df <- data2pc17df[-c(1)]
head(data2pc17df)
##   Priority Freq
## 1        0   26
## 2        0    2
## 3        0    1
## 4        0  945
## 5        0  144
## 6        0    8
#show 5 numbers and outliers
pcbox17 <- ggplot(data2pc17df, 
                  aes(x = Priority, y = Freq, fill = Priority)
                  ) +
           geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
           theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
           ggtitle("2017 Crime Count of Severity") +
           xlab("Severity") +
           ylab("Count") +
           theme(legend.position = "right") +
           labs(fill = "Severity")
pcbox17

pcbox17gg <- ggplotly(pcbox17) %>% config(displayModeBar = F)
pcbox17gg
#show 5 numbers and outliers w/ rate
head(data2317tb, 10)
##                   city Priority Freq Census.2010 rate
## 1  ASHTON-SANDY SPRING        0   26        5628  0.5
## 2  ASHTON-SANDY SPRING        3   28        5628  0.5
## 3  ASHTON-SANDY SPRING        2   69        5628  1.2
## 4  ASHTON-SANDY SPRING        4   51        5628  0.9
## 5  ASHTON-SANDY SPRING        1  121        5628  2.1
## 6          BARNESVILLE        0    2         172  1.2
## 7          BARNESVILLE        2    6         172  3.5
## 8          BARNESVILLE        4    5         172  2.9
## 9          BARNESVILLE        1   12         172  7.0
## 10         BARNESVILLE        3    3         172  1.7
data23box17 <- ggplot(data2317tb, 
                  aes(x = Priority, y = rate, fill = Priority)
                  ) +
           geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
           theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
           ggtitle("2017 Crime Rate by Severity") +
           xlab("Severity") +
           ylab("Rate") +
           theme(legend.position = "right") +
           labs(fill = "Severity")
data23box17

data23box17gg <- ggplotly(data23box17) %>% config(displayModeBar = F)
data23box17gg

- Police Dispatched Incidents 2018

Split the year and time

#2018 whole year
data2pc18 <- filter(data2pc, year(datetime) == 2018)
head(data2pc18)
##           End.Time Priority
## 1  4/27/2018 18:23        1
## 2    5/2/2018 8:51        4
## 3 10/19/2018 19:48        0
## 4   8/9/2018 13:21        4
## 5  12/6/2018 13:46        1
## 6   5/24/2018 9:11        1
##                                              Close.Type          city
## 1                                          WORKING CODE SILVER SPRING
## 2 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT      BETHESDA
## 3                       TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 4 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT      BETHESDA
## 5                     SUSPICIOUS CIRC, PERSONS, VEHICLE    GERMANTOWN
## 6                                         CHECK WELFARE SILVER SPRING
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     4D                 98               209             503
## 2                     2D                 64              1564              NA
## 3                     3D                103               153             335
## 4                     2D                 67               796              NA
## 5                     1D                138               361            1192
## 6                     3D                204               314            1082
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1            13964                111             293          13461
## 2             4300               1500              NA             NA
## 3             3351                 50             181           3016
## 4             2063                729              NA             NA
## 5             6618                223             830           5426
## 6            28297                110             767          27215
##              datetime     time       date
## 1 2018-04-27 18:23:00 18:23:00 04/27/2018
## 2 2018-05-02 08:51:00 08:51:00 05/02/2018
## 3 2018-10-19 19:48:00 19:48:00 10/19/2018
## 4 2018-08-09 13:21:00 13:21:00 08/09/2018
## 5 2018-12-06 13:46:00 13:46:00 12/06/2018
## 6 2018-05-24 09:11:00 09:11:00 05/24/2018
hmdfpc18 <- data2pc18[-c(1, 3:12, 14:15)]
hmdfpc18$hour <- hour(hmdfpc18$datetime)
hmdfpc18 <- hmdfpc18[-c(2)]
hmdfpc18$Priority <- as.numeric(hmdfpc18$Priority)
head(hmdfpc18)
##   Priority hour
## 1        1   18
## 2        4    8
## 3        0   19
## 4        4   13
## 5        1   13
## 6        1    9
tbpc18 <- table(hmdfpc18$Priority, hmdfpc18$hour)
head(tbpc18)
##    
##        0    1    2    3    4    5    6    7    8    9   10   11   12   13   14
##   0  979  782  646  508  405  337  314  398  579  680  698  802  809  857  879
##   1 3708 2835 2234 1846 1666 1497 1438 2285 3124 3747 4265 4334 4674 4637 4524
##   2 1484 1012  828  641  576  515  620 1200 1780 2118 2132 2309 2584 2795 2727
##   3  877  570  378  245  175  113   90  246  372  503  642  684  673  775  792
##   4  609  371  244  174  179  182  253  774 1430 2060 2350 2390 2439 2565 2325
##    
##       15   16   17   18   19   20   21   22   23
##   0 1186 1172 1141 1281 1202 1131 1164 1192 1109
##   1 4522 5605 5235 5213 5296 5057 5127 4992 4379
##   2 2864 3852 3539 3593 3087 2501 2386 2235 1823
##   3  744  884  929  922  880  881 1061 1246 1091
##   4 1932 2814 2445 2193 2145 1820 1529 1344  947
hourtable <- table(hmdfpc18$hour)
density <- density(hmdfpc18$hour)
barplot(tbpc18)

tbpc18df <- as.data.frame(tbpc18)
names(tbpc18df)[names(tbpc18df) == "Var1"] <- "severity"
names(tbpc18df)[names(tbpc18df) == "Var2"] <- "hour"
head(tbpc18df)
##   severity hour Freq
## 1        0    0  979
## 2        1    0 3708
## 3        2    0 1484
## 4        3    0  877
## 5        4    0  609
## 6        0    1  782
htpc18 <- ggplot(tbpc18df, 
                 aes(hour, severity, fill = Freq)
                 ) + 
          geom_tile() +  
          scale_fill_gradient(low = "white", high = "blue") +
          geom_text(aes(label = Freq), size = 3) +
          ggtitle("2018 Hourly Crime per Severity (0 = most dangerous)")
htpc18

#split the time quarterly in a day
#2018 Day1: 00:00 to 05:59
data2pc18td1 <- filter(data2pc18, hour(datetime) >= 0 & hour(datetime) < 6)
head(data2pc18td1)
##         End.Time Priority                                     Close.Type
## 1 8/21/2018 0:37        1                  HARASSMENT, STALKING, THREATS
## 2 4/23/2018 3:01        1              SUSPICIOUS CIRC, PERSONS, VEHICLE
## 3 1/15/2018 5:43        2                                  THEFT/LARCENY
## 4 2/27/2018 1:54        3              SEXUAL ASSAULT - OCCURRED EARLIER
## 5 4/19/2018 1:56        1                                   WORKING CODE
## 6 5/16/2018 0:04        3 ABUSE, ABANDONMENT, NEGLECT - OCCURRED EARLIER
##                 city Police.District.Number CallTime.CallRoute
## 1          ROCKVILLE                     1D                287
## 2 MONTGOMERY VILLAGE                     6D                 78
## 3      SILVER SPRING                     3D                 45
## 4      SILVER SPRING                     3D                299
## 5          ROCKVILLE                     4D                142
## 6          ROCKVILLE                     6D                120
##   Calltime.Dispatch Calltime.Arrive Calltime.Cleared CallRoute.Dispatch
## 1               417             461             3782                130
## 2               115             381             7722                 36
## 3               137             543             1722                 91
## 4               619            2261             6982                319
## 5               203             469            17545                 60
## 6               556            1135             7742                436
##   Dispatch.Arrive Arrive.Cleared            datetime     time       date
## 1              43           3321 2018-08-21 00:37:00 00:37:00 08/21/2018
## 2             266           7340 2018-04-23 03:01:00 03:01:00 04/23/2018
## 3             406           1179 2018-01-15 05:43:00 05:43:00 01/15/2018
## 4            1642           4721 2018-02-27 01:54:00 01:54:00 02/27/2018
## 5             265          17076 2018-04-19 01:56:00 01:56:00 04/19/2018
## 6             579           6606 2018-05-16 00:04:00 00:04:00 05/16/2018
day1pc18 <- data2pc18td1[-c(1, 3:12, 14:15)]
day1pc18$hour <- hour(day1pc18$datetime)
day1pc18 <- day1pc18[-c(2)]
day1pc18$Priority <- as.numeric(day1pc18$Priority)
head(day1pc18)
##   Priority hour
## 1        1    0
## 2        1    3
## 3        2    5
## 4        3    1
## 5        1    1
## 6        3    0
d1pc18table <- table(day1pc18$Priority, day1pc18$hour)
head(d1pc18table)
##    
##        0    1    2    3    4    5
##   0  979  782  646  508  405  337
##   1 3708 2835 2234 1846 1666 1497
##   2 1484 1012  828  641  576  515
##   3  877  570  378  245  175  113
##   4  609  371  244  174  179  182
d1pc18hrtable <- table(day1pc18$hour)
d1pc18density <- density(day1pc18$hour)
barplot(d1pc18table)

d1pc18tbdf <- as.data.frame(d1pc18table)
names(d1pc18tbdf)[names(d1pc18tbdf) == "Var1"] <- "severity"
names(d1pc18tbdf)[names(d1pc18tbdf) == "Var2"] <- "hour"
head(d1pc18tbdf)
##   severity hour Freq
## 1        0    0  979
## 2        1    0 3708
## 3        2    0 1484
## 4        3    0  877
## 5        4    0  609
## 6        0    1  782
#2018 Day2: 06:00 to 11:59
data2pc18td2 <- filter(data2pc18, hour(datetime) >= 6 & hour(datetime) < 12)
head(data2pc18td2)
##          End.Time Priority
## 1   5/2/2018 8:51        4
## 2  5/24/2018 9:11        1
## 3  3/16/2018 6:04        0
## 4  2/2/2018 10:25        1
## 5 8/25/2018 11:28        3
## 6  9/14/2018 8:31        2
##                                              Close.Type          city
## 1 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT      BETHESDA
## 2                                         CHECK WELFARE SILVER SPRING
## 3                                        SEXUAL ASSAULT  GAITHERSBURG
## 4                                                   CDS  GAITHERSBURG
## 5                     SEXUAL ASSAULT - OCCURRED EARLIER   CHEVY CHASE
## 6                   STABO - STABBING - OCCURRED EARLIER SILVER SPRING
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     2D                 64              1564              NA
## 2                     3D                204               314            1082
## 3                     6D                125               158             237
## 4                     1D                 52                NA              NA
## 5                     2D                 78              1808            2543
## 6                     3D                171               242            2312
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             4300               1500              NA             NA
## 2            28297                110             767          27215
## 3            18239                 33              79          18001
## 4               52                 NA              NA             NA
## 5             5882               1729             735           3338
## 6            17428                 71            2070          15115
##              datetime     time       date
## 1 2018-05-02 08:51:00 08:51:00 05/02/2018
## 2 2018-05-24 09:11:00 09:11:00 05/24/2018
## 3 2018-03-16 06:04:00 06:04:00 03/16/2018
## 4 2018-02-02 10:25:00 10:25:00 02/02/2018
## 5 2018-08-25 11:28:00 11:28:00 08/25/2018
## 6 2018-09-14 08:31:00 08:31:00 09/14/2018
day2pc18 <- data2pc18td2[-c(1, 3:12, 14:15)]
day2pc18$hour <- hour(day2pc18$datetime)
day2pc18 <- day2pc18[-c(2)]
day2pc18$Priority <- as.numeric(day2pc18$Priority)
head(day2pc18)
##   Priority hour
## 1        4    8
## 2        1    9
## 3        0    6
## 4        1   10
## 5        3   11
## 6        2    8
d2pc18table <- table(day2pc18$Priority, day2pc18$hour)
head(d2pc18table)
##    
##        6    7    8    9   10   11
##   0  314  398  579  680  698  802
##   1 1438 2285 3124 3747 4265 4334
##   2  620 1200 1780 2118 2132 2309
##   3   90  246  372  503  642  684
##   4  253  774 1430 2060 2350 2390
d2pc18hrtable <- table(day2pc18$hour)
d2pc18density <- density(day2pc18$hour)
barplot(d2pc18table)

d2pc18tbdf <- as.data.frame(d2pc18table)
names(d2pc18tbdf)[names(d2pc18tbdf) == "Var1"] <- "severity"
names(d2pc18tbdf)[names(d2pc18tbdf) == "Var2"] <- "hour"
head(d2pc18tbdf)
##   severity hour Freq
## 1        0    6  314
## 2        1    6 1438
## 3        2    6  620
## 4        3    6   90
## 5        4    6  253
## 6        0    7  398
#2018 Day3: 12:00 to 17:59
data2pc18tn1 <- filter(data2pc18, hour(datetime) >= 12 & hour(datetime) < 18)
head(data2pc18tn1)
##           End.Time Priority
## 1   8/9/2018 13:21        4
## 2  12/6/2018 13:46        1
## 3 12/30/2018 15:06        4
## 4 10/10/2018 14:45        4
## 5  10/8/2018 13:44        4
## 6 10/28/2018 15:30        3
##                                              Close.Type          city
## 1 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT      BETHESDA
## 2                     SUSPICIOUS CIRC, PERSONS, VEHICLE    GERMANTOWN
## 3 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT  GAITHERSBURG
## 4 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT SILVER SPRING
## 5                    FRAUD/DECEPTION - OCCURRED EARLIER     ROCKVILLE
## 6      DOMESTIC DISTURBANCE/VIOLENCE - OCCURRED EARLIER     ROCKVILLE
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     2D                 67               796              NA
## 2                     1D                138               361            1192
## 3                     6D                301              2695              NA
## 4                     4D                136               154              NA
## 5                     1D                334               382            1766
## 6                     1D                 26              1130            2527
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             2063                729              NA             NA
## 2             6618                223             830           5426
## 3             3829               2393              NA             NA
## 4             1982                 18              NA             NA
## 5             5235                 47            1384           3468
## 6             5211               1103            1397           2683
##              datetime     time       date
## 1 2018-08-09 13:21:00 13:21:00 08/09/2018
## 2 2018-12-06 13:46:00 13:46:00 12/06/2018
## 3 2018-12-30 15:06:00 15:06:00 12/30/2018
## 4 2018-10-10 14:45:00 14:45:00 10/10/2018
## 5 2018-10-08 13:44:00 13:44:00 10/08/2018
## 6 2018-10-28 15:30:00 15:30:00 10/28/2018
day3pc18 <- data2pc18tn1[-c(1, 3:12, 14:15)]
day3pc18$hour <- hour(day3pc18$datetime)
day3pc18 <- day3pc18[-c(2)]
day3pc18$Priority <- as.numeric(day3pc18$Priority)
head(day3pc18)
##   Priority hour
## 1        4   13
## 2        1   13
## 3        4   15
## 4        4   14
## 5        4   13
## 6        3   15
d3pc18table <- table(day3pc18$Priority, day3pc18$hour)
head(d3pc18table)
##    
##       12   13   14   15   16   17
##   0  809  857  879 1186 1172 1141
##   1 4674 4637 4524 4522 5605 5235
##   2 2584 2795 2727 2864 3852 3539
##   3  673  775  792  744  884  929
##   4 2439 2565 2325 1932 2814 2445
d3pc18hrtable <- table(day3pc18$hour)
d3pc18density <- density(day3pc18$hour)
barplot(d3pc18table)

d3pc18tbdf <- as.data.frame(d3pc18table)
names(d3pc18tbdf)[names(d3pc18tbdf) == "Var1"] <- "severity"
names(d3pc18tbdf)[names(d3pc18tbdf) == "Var2"] <- "hour"
head(d3pc18tbdf)
##   severity hour Freq
## 1        0   12  809
## 2        1   12 4674
## 3        2   12 2584
## 4        3   12  673
## 5        4   12 2439
## 6        0   13  857
#2018 Day4: 18:00 to 23:59
data2pc18tn2 <- filter(data2pc18, hour(datetime) >= 18 & hour(datetime) < 24)
head(data2pc18tn2)
##           End.Time Priority                                       Close.Type
## 1  4/27/2018 18:23        1                                     WORKING CODE
## 2 10/19/2018 19:48        0                  TRAFFIC/TRANSPORTATION INCIDENT
## 3 10/12/2018 21:04        4                      BURGLARY - OCCURRED EARLIER
## 4  5/23/2018 18:53        0 DOMESTIC DISTURBANCE/VIOLENCE - OCCURRED EARLIER
## 5   6/6/2018 22:34        0                           BURGLARY JUST OCCURRED
## 6   5/3/2018 18:08        2                                 STATION RESPONSE
##            city Police.District.Number CallTime.CallRoute Calltime.Dispatch
## 1 SILVER SPRING                     4D                 98               209
## 2 SILVER SPRING                     3D                103               153
## 3       POTOMAC                     1D                135               758
## 4     ROCKVILLE                     1D                143               195
## 5     ROCKVILLE                     4D                 64               174
## 6      BETHESDA                     2D                133               534
##   Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1             503            13964                111             293
## 2             335             3351                 50             181
## 3             769            17419                623              10
## 4             214            10384                 52              18
## 5             919             7074                110             745
## 6             674             5400                401             139
##   Arrive.Cleared            datetime     time       date
## 1          13461 2018-04-27 18:23:00 18:23:00 04/27/2018
## 2           3016 2018-10-19 19:48:00 19:48:00 10/19/2018
## 3          16650 2018-10-12 21:04:00 21:04:00 10/12/2018
## 4          10169 2018-05-23 18:53:00 18:53:00 05/23/2018
## 5           6154 2018-06-06 22:34:00 22:34:00 06/06/2018
## 6           4725 2018-05-03 18:08:00 18:08:00 05/03/2018
day4pc18 <- data2pc18tn2[-c(1, 3:12, 14:15)]
day4pc18$hour <- hour(day4pc18$datetime)
day4pc18 <- day4pc18[-c(2)]
day4pc18$Priority <- as.numeric(day4pc18$Priority)
head(day4pc18)
##   Priority hour
## 1        1   18
## 2        0   19
## 3        4   21
## 4        0   18
## 5        0   22
## 6        2   18
d4pc18table <- table(day4pc18$Priority, day4pc18$hour)
head(d4pc18table)
##    
##       18   19   20   21   22   23
##   0 1281 1202 1131 1164 1192 1109
##   1 5213 5296 5057 5127 4992 4379
##   2 3593 3087 2501 2386 2235 1823
##   3  922  880  881 1061 1246 1091
##   4 2193 2145 1820 1529 1344  947
d4pc18hrtable <- table(day4pc18$hour)
d4pc18density <- density(day4pc18$hour)
barplot(d4pc18table)

d4pc18tbdf <- as.data.frame(d4pc18table)
names(d4pc18tbdf)[names(d4pc18tbdf) == "Var1"] <- "severity"
names(d4pc18tbdf)[names(d4pc18tbdf) == "Var2"] <- "hour"
head(d4pc18tbdf)
##   severity hour Freq
## 1        0   18 1281
## 2        1   18 5213
## 3        2   18 3593
## 4        3   18  922
## 5        4   18 2193
## 6        0   19 1202

combine Day1, 2, 3, 4 in 2018 Data

day1234pc18 <- do.call("rbind", list(d1pc18tbdf, d2pc18tbdf, d3pc18tbdf, d4pc18tbdf))
head(day1234pc18)
##   severity hour Freq
## 1        0    0  979
## 2        1    0 3708
## 3        2    0 1484
## 4        3    0  877
## 5        4    0  609
## 6        0    1  782
day1234pc18gg <- ggplot(day1234pc18, 
                        aes(x = hour, y = Freq, group = severity, shape = severity,
                            color = severity)
                            ) + 
                 geom_line() + 
                 geom_point(aes(text = 
                                paste(paste("Hour: ", hour, "<br>"),
                                paste("Frequency: ", Freq, "<br>"),
                                paste("Severity: ", severity))),
                             size = 2, 
                             data = day1234pc18
                            ) +
                 ggtitle("2018 Hourly Frequency of Severity") +
                 theme(axis.text.x = element_text(size = 5)) +
                 scale_x_discrete("Hour", breaks = labels, labels = labels) +
                 scale_y_continuous("Frequency") +
                 facet_grid(.~severity )
## Warning: Ignoring unknown aesthetics: text
day1234pc18gg

day1234pc18gp <- ggplotly(day1234pc18gg, tooltip =  "text") %>% 
                 config(displayModeBar = F)
day1234pc18gp
day1234pc18gg2 <- ggplot(day1234pc18, 
                         aes(x = hour, y = Freq, group = severity, shape = severity,
                             color = severity)
                             ) + 
                  geom_line() + 
                  geom_point(aes(text = 
                                 paste(paste("Hour: ", hour, "<br>"),
                                 paste("Frequency: ", Freq, "<br>"),
                                 paste("Severity: ", severity))),
                              size = 2.5, 
                              data = day1234pc18
                             ) +
                  ggtitle("2018 Hourly Frequency of Severity") +
                  theme(axis.text.x = element_text(size = 10)) +
                  scale_x_discrete("Hour", breaks = labels, labels = labels) +
                  scale_y_continuous("Frequency")
## Warning: Ignoring unknown aesthetics: text
day1234pc18gg2

day1234pc18gp2 <- ggplotly(day1234pc18gg2, tooltip =  "text") %>% 
                  config(displayModeBar = F)
day1234pc18gp2
#City 2018
data2pc18tb <- table(data2pc18$city, data2pc18$Priority)
names(dimnames(data2pc18tb)) <- c("city", "Priority")
head(data2pc18tb)
##                      Priority
## city                     0    1    2    3    4
##   ASHTON-SANDY SPRING   40  162   90   20   42
##   BARNESVILLE            2   14   14    4    3
##   BEALLSVILLE            7   15   17    2    3
##   BETHESDA            1251 7485 3958  940 2920
##   BOYDS                185  516  241   79  190
##   BRINKLOW              13   46   21    6    9
#merge with Population
data2318tb <- merge(data2pc18tb, data3popc, all = TRUE)
data2318tb$rate <- round((data2318tb$Freq/data2318tb$Census.2010)*100, digits = 1)
head(data2318tb)
##                  city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING        0   40        5628  0.7
## 2 ASHTON-SANDY SPRING        3   20        5628  0.4
## 3 ASHTON-SANDY SPRING        2   90        5628  1.6
## 4 ASHTON-SANDY SPRING        4   42        5628  0.7
## 5 ASHTON-SANDY SPRING        1  162        5628  2.9
## 6         BARNESVILLE        0    2         172  1.2
data2pc18sum <- rowSums(data2pc18tb)
head(data2pc18sum)
## ASHTON-SANDY SPRING         BARNESVILLE         BEALLSVILLE            BETHESDA 
##                 354                  37                  44               16554 
##               BOYDS            BRINKLOW 
##                1211                  95
summary(data2pc18sum)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      37     122    1970    8519    7024   71850
data2pc18sumdf <- as.data.frame(data2pc18sum)
names(dimnames(data2pc18sumdf)) <- c("city", "Total")
data2pc18sumdf$Total <- data2pc18sumdf$data2pc18sum
data2pc18sumdf$city <- row.names(data2pc18sumdf)
data2pc18sumdf <- data2pc18sumdf[-c(1)]
head(data2pc18sumdf)
##                     Total                city
## ASHTON-SANDY SPRING   354 ASHTON-SANDY SPRING
## BARNESVILLE            37         BARNESVILLE
## BEALLSVILLE            44         BEALLSVILLE
## BETHESDA            16554            BETHESDA
## BOYDS                1211               BOYDS
## BRINKLOW               95            BRINKLOW
#Priority 2018
data2pc18tb2 <- table(data2pc18$Priority, data2pc18$city)
names(dimnames(data2pc18tb2)) <- c("Priority", "city")
head(data2pc18tb2)
##         city
## Priority ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA BOYDS BRINKLOW
##        0                  40           2           7     1251   185       13
##        1                 162          14          15     7485   516       46
##        2                  90          14          17     3958   241       21
##        3                  20           4           2      940    79        6
##        4                  42           3           3     2920   190        9
##         city
## Priority CABIN JOHN CHEVY CHASE CLARKSBURG DAMASCUS DERWOOD DICKERSON
##        0         19         349        265      216     435        29
##        1        103        2429       1556      898    1616       164
##        2         40        1214        592      387    1090       120
##        3         34         248        241      176     339        51
##        4         56        1056        634      293     513        47
##         city
## Priority GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO KENSINGTON
##        0         2944            5       2238         6        339
##        1        12965           48       8803        30       1671
##        2         6977           19       4465        17        713
##        3         2360            6       1624         4        279
##        4         4975           26       3072         8        623
##         city
## Priority MONTGOMERY VILLAGE OLNEY POOLESVILLE POTOMAC ROCKVILLE SILVER SPRING
##        0                837   327          73     453      2774          7422
##        1               3218  1770         361    3888     15190         29210
##        2               1126   691         147    1402      7300         18529
##        3                681   342          71     441      2699          5116
##        4               1162   620         123    1098      6442         11573
##         city
## Priority SPENCERVILLE WASHINGTON GROVE
##        0           19                3
##        1           58               24
##        2           25                6
##        3            6                4
##        4           14               12
data2pc18sum2 <- rowSums(data2pc18tb2)
head(data2pc18sum2)
##     0     1     2     3     4 
## 20251 92240 49201 15773 35514
summary(data2pc18sum2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15773   20251   35514   42596   49201   92240
data2pc18sumdf2 <- as.data.frame(data2pc18sum2)
names(dimnames(data2pc18sumdf2)) <- c("Severity", "Total")
data2pc18sumdf2$Total <- data2pc18sumdf2$data2pc18sum
data2pc18sumdf2$Severity <- row.names(data2pc18sumdf2)
data2pc18sumdf2 <- data2pc18sumdf2[-c(1)]
head(data2pc18sumdf2)
##   Total Severity
## 0 20251        0
## 1 92240        1
## 2 49201        2
## 3 15773        3
## 4 35514        4
#conver the data.table to a dataframe
data2pc18df <- as.data.frame(data2pc18tb)
data2pc18df <- data2pc18df[-c(1)]
head(data2pc18df)
##   Priority Freq
## 1        0   40
## 2        0    2
## 3        0    7
## 4        0 1251
## 5        0  185
## 6        0   13
#show 5 numbers and outliers
pcbox18 <- ggplot(data2pc18df, 
                  aes(x = Priority, y = Freq, fill = Priority)
                  ) +
           geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
           theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
           ggtitle("2018 Crime Count of Severity") +
           xlab("Severity") +
           ylab("Count") +
           theme(legend.position = "right") +
           labs(fill = "Severity")
pcbox18

pcbox18gg <- ggplotly(pcbox18) %>% config(displayModeBar = F)
pcbox18gg
#show 5 numbers and outliers w/ rate
head(data2318tb, 10)
##                   city Priority Freq Census.2010 rate
## 1  ASHTON-SANDY SPRING        0   40        5628  0.7
## 2  ASHTON-SANDY SPRING        3   20        5628  0.4
## 3  ASHTON-SANDY SPRING        2   90        5628  1.6
## 4  ASHTON-SANDY SPRING        4   42        5628  0.7
## 5  ASHTON-SANDY SPRING        1  162        5628  2.9
## 6          BARNESVILLE        0    2         172  1.2
## 7          BARNESVILLE        2   14         172  8.1
## 8          BARNESVILLE        4    3         172  1.7
## 9          BARNESVILLE        1   14         172  8.1
## 10         BARNESVILLE        3    4         172  2.3
data23box18 <- ggplot(data2318tb, 
                  aes(x = Priority, y = rate, fill = Priority)
                  ) +
           geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
           theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
           ggtitle("2018 Crime Rate by Severity") +
           xlab("Severity") +
           ylab("Rate") +
           theme(legend.position = "right") +
           labs(fill = "Severity")
data23box18

data23box18gg <- ggplotly(data23box18) %>% config(displayModeBar = F)
data23box18gg

- Police Dispatched Incidents 2019

Split the year and time

#2019 Whole year
data2pc19 <- filter(data2pc, year(datetime) == 2019)
head(data2pc19)
##          End.Time Priority                      Close.Type          city
## 1 9/14/2019 14:29        0 TRAFFIC/TRANSPORTATION INCIDENT    GERMANTOWN
## 2 10/22/2019 8:25        2 TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 3   9/8/2019 3:47        2             ASSIST OTHER AGENCY      DAMASCUS
## 4  3/21/2019 5:59        0                   URGENT ASSIST     ROCKVILLE
## 5 8/14/2019 19:59        2               TRAFFIC VIOLATION     ROCKVILLE
## 6  2/24/2019 0:56        2 TRAFFIC/TRANSPORTATION INCIDENT      BETHESDA
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     5D                 99               118             347
## 2                     3D                 92               282             627
## 3                     5D                  0                77             604
## 4                     4D                  0                44             461
## 5                     1D                120               575             788
## 6                     2D                402              1354            1513
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             4296                 18             228           3949
## 2             1801                189             345           1174
## 3              628                 77             527             24
## 4            35782                 44             416          35321
## 5             1227                455             212            439
## 6             4237                952             159           2723
##              datetime     time       date
## 1 2019-09-14 14:29:00 14:29:00 09/14/2019
## 2 2019-10-22 08:25:00 08:25:00 10/22/2019
## 3 2019-09-08 03:47:00 03:47:00 09/08/2019
## 4 2019-03-21 05:59:00 05:59:00 03/21/2019
## 5 2019-08-14 19:59:00 19:59:00 08/14/2019
## 6 2019-02-24 00:56:00 00:56:00 02/24/2019
hmdfpc19 <- data2pc19[-c(1, 3:12, 14:15)]
hmdfpc19$hour <- hour(hmdfpc19$datetime)
hmdfpc19 <- hmdfpc19[-c(2)]
hmdfpc19$Priority <- as.numeric(hmdfpc19$Priority)
head(hmdfpc19)
##   Priority hour
## 1        0   14
## 2        2    8
## 3        2    3
## 4        0    5
## 5        2   19
## 6        2    0
tbpc19 <- table(hmdfpc19$Priority, hmdfpc19$hour)
head(tbpc19)
##    
##        0    1    2    3    4    5    6    7    8    9   10   11   12   13   14
##   0  984  755  598  555  413  331  313  352  465  607  676  695  796  849  809
##   1 3500 2745 2212 1816 1573 1367 1280 2202 2908 3476 3796 4029 4251 4290 4152
##   2 1279  903  712  565  522  481  567 1102 1707 1982 2024 2158 2451 2805 2690
##   3 1093  765  505  347  252  152  104  241  394  410  537  480  504  647  588
##   4  583  362  248  169  193  181  228  878 1488 2066 2533 2560 2499 2587 2444
##    
##       15   16   17   18   19   20   21   22   23
##   0 1124 1074 1141 1189 1241 1077 1166 1167 1043
##   1 4176 5319 5000 4972 4876 4902 4978 5092 4071
##   2 2776 3842 3469 3332 3004 2455 2213 2094 1617
##   3  577  725  758  737  751  763 1132 1664 1446
##   4 2167 2998 2719 2190 2031 1795 1591 1443  825
hourtable <- table(hmdfpc19$hour)
density <- density(hmdfpc19$hour)
barplot(tbpc19)

tbpc19df <- as.data.frame(tbpc19)
names(tbpc19df)[names(tbpc19df) == "Var1"] <- "severity"
names(tbpc19df)[names(tbpc19df) == "Var2"] <- "hour"
head(tbpc19df)
##   severity hour Freq
## 1        0    0  984
## 2        1    0 3500
## 3        2    0 1279
## 4        3    0 1093
## 5        4    0  583
## 6        0    1  755
htpc19 <- ggplot(tbpc19df, 
                 aes(hour, severity, fill = Freq)
                 ) + 
          geom_tile() +  scale_fill_gradient(low = "white", high = "red") +
          geom_text(aes(label = Freq), size = 3) +
          ggtitle("2019 Hourly Crime per Severity (0 = most dangerous)")
htpc19

#split the time quarterly in a day
#2019 Day1: 00:00 to 05:59
data2pc19td1 <- filter(data2pc19, hour(datetime) >= 0 & hour(datetime) < 6)
head(data2pc19td1)
##          End.Time Priority                      Close.Type          city
## 1   9/8/2019 3:47        2             ASSIST OTHER AGENCY      DAMASCUS
## 2  3/21/2019 5:59        0                   URGENT ASSIST     ROCKVILLE
## 3  2/24/2019 0:56        2 TRAFFIC/TRANSPORTATION INCIDENT      BETHESDA
## 4 10/18/2019 0:56        1                   CHECK WELFARE  GAITHERSBURG
## 5 11/24/2019 2:48        1 ASSAULT JUST OCCURRED - ROUTINE  GAITHERSBURG
## 6  5/17/2019 1:14        0   DOMESTIC DISTURBANCE/VIOLENCE SILVER SPRING
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     5D                  0                77             604
## 2                     4D                  0                44             461
## 3                     2D                402              1354            1513
## 4                     6D                646               811            1121
## 5                     6D                401               537             899
## 6                     3D                 42                99             208
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1              628                 77             527             24
## 2            35782                 44             416          35321
## 3             4237                952             159           2723
## 4             1496                164             310            375
## 5             2464                136             361           1565
## 6             9544                 56             108           9336
##              datetime     time       date
## 1 2019-09-08 03:47:00 03:47:00 09/08/2019
## 2 2019-03-21 05:59:00 05:59:00 03/21/2019
## 3 2019-02-24 00:56:00 00:56:00 02/24/2019
## 4 2019-10-18 00:56:00 00:56:00 10/18/2019
## 5 2019-11-24 02:48:00 02:48:00 11/24/2019
## 6 2019-05-17 01:14:00 01:14:00 05/17/2019
day1pc19 <- data2pc19td1[-c(1, 3:12, 14:15)]
day1pc19$hour <- hour(day1pc19$datetime)
day1pc19 <- day1pc19[-c(2)]
day1pc19$Priority <- as.numeric(day1pc19$Priority)
head(day1pc19)
##   Priority hour
## 1        2    3
## 2        0    5
## 3        2    0
## 4        1    0
## 5        1    2
## 6        0    1
d1pc19table <- table(day1pc19$Priority, day1pc19$hour)
head(d1pc19table)
##    
##        0    1    2    3    4    5
##   0  984  755  598  555  413  331
##   1 3500 2745 2212 1816 1573 1367
##   2 1279  903  712  565  522  481
##   3 1093  765  505  347  252  152
##   4  583  362  248  169  193  181
d1pc19hrtable <- table(day1pc19$hour)
d1pc19density <- density(day1pc19$hour)
barplot(d1pc19table)

d1pc19tbdf <- as.data.frame(d1pc19table)
names(d1pc19tbdf)[names(d1pc19tbdf) == "Var1"] <- "severity"
names(d1pc19tbdf)[names(d1pc19tbdf) == "Var2"] <- "hour"
head(d1pc19tbdf)
##   severity hour Freq
## 1        0    0  984
## 2        1    0 3500
## 3        2    0 1279
## 4        3    0 1093
## 5        4    0  583
## 6        0    1  755
#2019 Day2: 06:00 to 11:59
data2pc19td2 <- filter(data2pc19, hour(datetime) >= 6 & hour(datetime) < 12)
head(data2pc19td2)
##          End.Time Priority
## 1 10/22/2019 8:25        2
## 2  5/18/2019 8:22        1
## 3 5/30/2019 10:09        4
## 4 5/30/2019 11:19        4
## 5 5/15/2019 11:19        1
## 6 5/15/2019 10:13        4
##                                              Close.Type          city
## 1                       TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 2                                         CHECK WELFARE SILVER SPRING
## 3                      THEFT/LARCENY - OCCURRED EARLIER     ROCKVILLE
## 4 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT     ROCKVILLE
## 5                     SUSPICIOUS CIRC, PERSONS, VEHICLE SILVER SPRING
## 6    TRAFFIC/TRANSPORTATION INCIDENT - OCCURRED EARLIER    GERMANTOWN
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     3D                 92               282             627
## 2                     3D                416               756            1204
## 3                     1D                215               365            1429
## 4                     6D                139               350              NA
## 5                     3D                 60               326             484
## 6                     5D                221               404            1276
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             1801                189             345           1174
## 2             2817                339             448           1612
## 3             4260                150            1064           2830
## 4             2182                211              NA             NA
## 5             5468                265             158           4984
## 6             3241                182             871           1964
##              datetime     time       date
## 1 2019-10-22 08:25:00 08:25:00 10/22/2019
## 2 2019-05-18 08:22:00 08:22:00 05/18/2019
## 3 2019-05-30 10:09:00 10:09:00 05/30/2019
## 4 2019-05-30 11:19:00 11:19:00 05/30/2019
## 5 2019-05-15 11:19:00 11:19:00 05/15/2019
## 6 2019-05-15 10:13:00 10:13:00 05/15/2019
day2pc19 <- data2pc19td2[-c(1, 3:12, 14:15)]
day2pc19$hour <- hour(day2pc19$datetime)
day2pc19 <- day2pc19[-c(2)]
day2pc19$Priority <- as.numeric(day2pc19$Priority)
head(day2pc19)
##   Priority hour
## 1        2    8
## 2        1    8
## 3        4   10
## 4        4   11
## 5        1   11
## 6        4   10
d2pc19table <- table(day2pc19$Priority, day2pc19$hour)
head(d2pc19table)
##    
##        6    7    8    9   10   11
##   0  313  352  465  607  676  695
##   1 1280 2202 2908 3476 3796 4029
##   2  567 1102 1707 1982 2024 2158
##   3  104  241  394  410  537  480
##   4  228  878 1488 2066 2533 2560
d2pc19hrtable <- table(day2pc19$hour)
d2pc19density <- density(day2pc19$hour)
barplot(d2pc19table)

d2pc19tbdf <- as.data.frame(d2pc19table)
names(d2pc19tbdf)[names(d2pc19tbdf) == "Var1"] <- "severity"
names(d2pc19tbdf)[names(d2pc19tbdf) == "Var2"] <- "hour"
head(d2pc19tbdf)
##   severity hour Freq
## 1        0    6  313
## 2        1    6 1280
## 3        2    6  567
## 4        3    6  104
## 5        4    6  228
## 6        0    7  352
#2019 Day3: 12:00 to 17:59
data2pc19tn1 <- filter(data2pc19, hour(datetime) >= 12 & hour(datetime) < 18)
head(data2pc19tn1)
##           End.Time Priority                                     Close.Type
## 1  9/14/2019 14:29        0                TRAFFIC/TRANSPORTATION INCIDENT
## 2  9/29/2019 15:21        3                     ROBBERY - OCCURRED EARLIER
## 3   5/8/2019 15:13        2                                  THEFT/LARCENY
## 4  1/14/2019 13:42        4               THEFT/LARCENY - OCCURRED EARLIER
## 5  7/31/2019 15:37        4 VANDALISM, DAMAGE, MISCHIEF - OCCURRED EARLIER
## 6 10/21/2019 15:08        4             FRAUD/DECEPTION - OCCURRED EARLIER
##            city Police.District.Number CallTime.CallRoute Calltime.Dispatch
## 1    GERMANTOWN                     5D                 99               118
## 2 SILVER SPRING                     3D                 96               632
## 3    GERMANTOWN                     5D                 63               123
## 4 SILVER SPRING                     3D                385              1582
## 5    KENSINGTON                     4D                214              2728
## 6         BOYDS                     5D                337               590
##   Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1             347             4296                 18             228
## 2             754            11365                536             121
## 3             790             4847                 60             666
## 4            2087             4768               1197             505
## 5            2944             3684               2514             215
## 6             939             2611                252             348
##   Arrive.Cleared            datetime     time       date
## 1           3949 2019-09-14 14:29:00 14:29:00 09/14/2019
## 2          10610 2019-09-29 15:21:00 15:21:00 09/29/2019
## 3           4057 2019-05-08 15:13:00 15:13:00 05/08/2019
## 4           2680 2019-01-14 13:42:00 13:42:00 01/14/2019
## 5            740 2019-07-31 15:37:00 15:37:00 07/31/2019
## 6           1672 2019-10-21 15:08:00 15:08:00 10/21/2019
day3pc19 <- data2pc19tn1[-c(1, 3:12, 14:15)]
day3pc19$hour <- hour(day3pc19$datetime)
day3pc19 <- day3pc19[-c(2)]
day3pc19$Priority <- as.numeric(day3pc19$Priority)
head(day3pc19)
##   Priority hour
## 1        0   14
## 2        3   15
## 3        2   15
## 4        4   13
## 5        4   15
## 6        4   15
d3pc19table <- table(day3pc19$Priority, day3pc19$hour)
head(d3pc19table)
##    
##       12   13   14   15   16   17
##   0  796  849  809 1124 1074 1141
##   1 4251 4290 4152 4176 5319 5000
##   2 2451 2805 2690 2776 3842 3469
##   3  504  647  588  577  725  758
##   4 2499 2587 2444 2167 2998 2719
d3pc19hrtable <- table(day3pc19$hour)
d3pc19density <- density(day3pc19$hour)
barplot(d3pc19table)

d3pc19tbdf <- as.data.frame(d3pc19table)
names(d3pc19tbdf)[names(d3pc19tbdf) == "Var1"] <- "severity"
names(d3pc19tbdf)[names(d3pc19tbdf) == "Var2"] <- "hour"
head(d3pc19tbdf)
##   severity hour Freq
## 1        0   12  796
## 2        1   12 4251
## 3        2   12 2451
## 4        3   12  504
## 5        4   12 2499
## 6        0   13  849
#2019 Day4: 18:00 to 23:59
data2pc19tn2 <- filter(data2pc19, hour(datetime) >= 18 & hour(datetime) < 24)
head(data2pc19tn2)
##          End.Time Priority
## 1 8/14/2019 19:59        2
## 2  4/9/2019 19:26        4
## 3  5/7/2019 21:11        4
## 4  5/8/2019 23:37        1
## 5 5/30/2019 18:01        4
## 6 5/15/2019 20:29        2
##                                              Close.Type          city
## 1                                     TRAFFIC VIOLATION     ROCKVILLE
## 2        VANDALISM, DAMAGE, MISCHIEF - OCCURRED EARLIER    GERMANTOWN
## 3 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT  GAITHERSBURG
## 4                        MISSING, RUNAWAY, FOUND PERSON  GAITHERSBURG
## 5                      THEFT/LARCENY - OCCURRED EARLIER     ROCKVILLE
## 6                       THEFT/LARCENY - HOLDING SUSPECT SILVER SPRING
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     1D                120               575             788
## 2                     5D                182               297            1318
## 3                     6D                184               612              NA
## 4                     6D                245               930            1667
## 5                     2D                308               714            1270
## 6                     3D                 65               103            5750
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             1227                455             212            439
## 2             4769                114            1020           3450
## 3             1313                427              NA             NA
## 4             9964                685             737           8296
## 5             2702                406             555           1432
## 6             7455                 38            5646           1705
##              datetime     time       date
## 1 2019-08-14 19:59:00 19:59:00 08/14/2019
## 2 2019-04-09 19:26:00 19:26:00 04/09/2019
## 3 2019-05-07 21:11:00 21:11:00 05/07/2019
## 4 2019-05-08 23:37:00 23:37:00 05/08/2019
## 5 2019-05-30 18:01:00 18:01:00 05/30/2019
## 6 2019-05-15 20:29:00 20:29:00 05/15/2019
day4pc19 <- data2pc19tn2[-c(1, 3:12, 14:15)]
day4pc19$hour <- hour(day4pc19$datetime)
day4pc19 <- day4pc19[-c(2)]
day4pc19$Priority <- as.numeric(day4pc19$Priority)
head(day4pc19)
##   Priority hour
## 1        2   19
## 2        4   19
## 3        4   21
## 4        1   23
## 5        4   18
## 6        2   20
d4pc19table <- table(day4pc19$Priority, day4pc19$hour)
head(d4pc19table)
##    
##       18   19   20   21   22   23
##   0 1189 1241 1077 1166 1167 1043
##   1 4972 4876 4902 4978 5092 4071
##   2 3332 3004 2455 2213 2094 1617
##   3  737  751  763 1132 1664 1446
##   4 2190 2031 1795 1591 1443  825
d4pc19hrtable <- table(day4pc19$hour)
d4pc19density <- density(day4pc19$hour)
barplot(d4pc19table)

d4pc19tbdf <- as.data.frame(d4pc19table)
names(d4pc19tbdf)[names(d4pc19tbdf) == "Var1"] <- "severity"
names(d4pc19tbdf)[names(d4pc19tbdf) == "Var2"] <- "hour"
head(d4pc19tbdf)
##   severity hour Freq
## 1        0   18 1189
## 2        1   18 4972
## 3        2   18 3332
## 4        3   18  737
## 5        4   18 2190
## 6        0   19 1241

combine Day1, 2, 3, 4 in 2019 Data

day1234pc19 <- do.call("rbind", list(d1pc19tbdf, d2pc19tbdf, d3pc19tbdf, d4pc19tbdf))
head(day1234pc19)
##   severity hour Freq
## 1        0    0  984
## 2        1    0 3500
## 3        2    0 1279
## 4        3    0 1093
## 5        4    0  583
## 6        0    1  755
day1234pc19gg <- ggplot(day1234pc19, 
                        aes(x = hour, y = Freq, group = severity, shape = severity,
                            color = severity)
                            ) + 
                 geom_line() + 
                 geom_point(aes(text = 
                                paste(paste("Hour: ", day1234pc19$hour, "<br>"),
                                paste("Frequency: ", day1234pc19$Freq, "<br>"),
                                paste("Severity: ", day1234pc19$severity))),
                             size = 2, 
                             data = day1234pc19
                            ) +
                 ggtitle("2019 Hourly Frequency of Severity") +
                 theme(axis.text.x = element_text(size = 5)) +
                 scale_x_discrete("Hour", breaks = labels, labels = labels) +
                 scale_y_continuous("Frequency") +
                 facet_grid(.~severity )
## Warning: Ignoring unknown aesthetics: text
day1234pc19gg
## Warning: Use of `day1234pc19$hour` is discouraged. Use `hour` instead.
## Warning: Use of `day1234pc19$Freq` is discouraged. Use `Freq` instead.
## Warning: Use of `day1234pc19$severity` is discouraged. Use `severity` instead.

day1234pc19gp <- ggplotly(day1234pc19gg, tooltip =  "text") %>% 
                 config(displayModeBar = F)
## Warning: Use of `day1234pc19$hour` is discouraged. Use `hour` instead.
## Warning: Use of `day1234pc19$Freq` is discouraged. Use `Freq` instead.
## Warning: Use of `day1234pc19$severity` is discouraged. Use `severity` instead.
day1234pc19gp
#without animation
day1234pc19gg2 <- ggplot(day1234pc19, 
                         aes(x = hour, y = Freq, group = severity, 
                             shape = severity, color = severity)
                         ) + 
                   geom_line() + 
                   geom_point(aes(text = 
                                  paste(paste("Hour: ", hour, "<br>"),
                                  paste("Frequency: ", Freq, "<br>"),
                                  paste("Severity: ", severity))),
                               size = 2.5, 
                               data = day1234pc19
                              ) +
                   ggtitle("2019 Hourly Frequency of Severity") +
                   theme(axis.text.x = element_text(size = 10)) +
                   scale_x_discrete("Hour", breaks = labels, labels = labels) +
                   scale_y_continuous("Frequency")
## Warning: Ignoring unknown aesthetics: text
day1234pc19gg2

##with animation but it won't be used
#day1234pc19gg22 <- ggplot(day1234pc19, 
#                         aes(x = hour, y = Freq, group = severity, 
#                             shape = severity, color = severity)) + 
#                  geom_line() + 
#                  geom_point(aes(text = 
#                                 paste(paste("Hour: ", hour, "<br>"),
#                                 paste("Frequency: ", Freq, "<br>"),
#                                 paste("Severity: ", severity))),
#                              size = 1, 
#                              data = day1234pc19
#                             ) +
#                  ggtitle("2019 Hourly Frequency of Severity") +
#                  theme(axis.text.x = element_text(size = 5)) +
#                  scale_x_discrete("Hour", breaks = labels, labels = labels) +
#                  scale_y_continuous("Frequency") +
#                  transition_reveal(as.numeric(hour))
#
#day1234pc19gg22
#
#day1234pc19gp22 <- ggplotly(day1234pc19gg2, tooltip =  "text")  %>% 
#                  config(displayModeBar = F)
#day1234pc19gp22
#City 2019
data2pc19tb <- table(data2pc19$city, data2pc19$Priority)
names(dimnames(data2pc19tb)) <- c("city", "Priority")
head(data2pc19tb)
##                      Priority
## city                     0    1    2    3    4
##   ASHTON-SANDY SPRING   34  180  113   16   51
##   BARNESVILLE            2   19   14    4    3
##   BEALLSVILLE            2   18   22    7    5
##   BETHESDA            1103 6634 3891  879 2944
##   BOYDS                164  477  272   82  165
##   BRINKLOW               8   49   26    2   12
#merge with Population
data2319tb <- merge(data2pc19tb, data3popc, all = TRUE)
data2319tb$rate <- round((data2319tb$Freq/data2319tb$Census.2010)*100, digits = 1)
head(data2319tb)
##                  city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING        0   34        5628  0.6
## 2 ASHTON-SANDY SPRING        3   16        5628  0.3
## 3 ASHTON-SANDY SPRING        2  113        5628  2.0
## 4 ASHTON-SANDY SPRING        4   51        5628  0.9
## 5 ASHTON-SANDY SPRING        1  180        5628  3.2
## 6         BARNESVILLE        0    2         172  1.2
data2pc19sum <- rowSums(data2pc19tb)
data2pc19sum
## ASHTON-SANDY SPRING         BARNESVILLE         BEALLSVILLE            BETHESDA 
##                 394                  42                  54               15451 
##               BOYDS            BRINKLOW          CABIN JOHN         CHEVY CHASE 
##                1160                  97                 225                5295 
##          CLARKSBURG            DAMASCUS             DERWOOD           DICKERSON 
##                3370                1917                3750                 367 
##        GAITHERSBURG        GARRETT PARK          GERMANTOWN           GLEN ECHO 
##               29562                 116               18236                  62 
##          KENSINGTON  MONTGOMERY VILLAGE               OLNEY         POOLESVILLE 
##                3669                6955                3485                 758 
##             POTOMAC           ROCKVILLE       SILVER SPRING        SPENCERVILLE 
##                7189               33719               69440                 126 
##    WASHINGTON GROVE 
##                  64
summary(data2pc19sum)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      42     126    1917    8220    6955   69440
data2pc19sumdf <- as.data.frame(data2pc19sum)
names(dimnames(data2pc19sumdf)) <- c("city", "Total")
data2pc19sumdf$Total <- data2pc19sumdf$data2pc19sum
data2pc19sumdf$city <- row.names(data2pc19sumdf)
data2pc19sumdf <- data2pc19sumdf[-c(1)]
head(data2pc19sumdf)
##                     Total                city
## ASHTON-SANDY SPRING   394 ASHTON-SANDY SPRING
## BARNESVILLE            42         BARNESVILLE
## BEALLSVILLE            54         BEALLSVILLE
## BETHESDA            15451            BETHESDA
## BOYDS                1160               BOYDS
## BRINKLOW               97            BRINKLOW
#Priority 2019
data2pc19tb2 <- table(data2pc19$Priority, data2pc19$city)
names(dimnames(data2pc19tb2)) <- c("Priority", "city")
head(data2pc19tb2)
##         city
## Priority ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA BOYDS BRINKLOW
##        0                  34           2           2     1103   164        8
##        1                 180          19          18     6634   477       49
##        2                 113          14          22     3891   272       26
##        3                  16           4           7      879    82        2
##        4                  51           3           5     2944   165       12
##         city
## Priority CABIN JOHN CHEVY CHASE CLARKSBURG DAMASCUS DERWOOD DICKERSON
##        0         13         360        282      213     409        37
##        1        100        2385       1560      859    1560       129
##        2         40        1282        569      356     906       126
##        3         17         283        265      149     274        33
##        4         55         985        694      340     601        42
##         city
## Priority GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO KENSINGTON
##        0         2980            9       2076         6        291
##        1        12604           55       7990        19       1658
##        2         6571           19       3843        16        753
##        3         2389           12       1685         0        229
##        4         5018           21       2642        21        738
##         city
## Priority MONTGOMERY VILLAGE OLNEY POOLESVILLE POTOMAC ROCKVILLE SILVER SPRING
##        0                815   309          74     416      2738          7049
##        1               3129  1597         353    3909     14336         27293
##        2               1042   687         136    1329      6884         17809
##        3                731   253          62     367      2808          5008
##        4               1238   639         133    1168      6953         12281
##         city
## Priority SPENCERVILLE WASHINGTON GROVE
##        0           23                7
##        1           40               30
##        2           36                8
##        3            8                9
##        4           19               10
data2pc19sum2 <- rowSums(data2pc19tb2)
data2pc19sum2
##     0     1     2     3     4 
## 19420 86983 46750 15572 36778
summary(data2pc19sum2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15572   19420   36778   41101   46750   86983
data2pc19sumdf2 <- as.data.frame(data2pc19sum2)
names(dimnames(data2pc19sumdf2)) <- c("Severity", "Total")
data2pc19sumdf2$Total <- data2pc19sumdf2$data2pc19sum
data2pc19sumdf2$Severity <- row.names(data2pc19sumdf2)
data2pc19sumdf2 <- data2pc19sumdf2[-c(1)]
head(data2pc19sumdf2)
##   Total Severity
## 0 19420        0
## 1 86983        1
## 2 46750        2
## 3 15572        3
## 4 36778        4
#conver the data.table to a dataframe
data2pc19df <- as.data.frame(data2pc19tb)
data2pc19df <- data2pc19df[-c(1)]
head(data2pc19df)
##   Priority Freq
## 1        0   34
## 2        0    2
## 3        0    2
## 4        0 1103
## 5        0  164
## 6        0    8
#show 5 numbers and outliers w/ Frequency
pcbox19 <- ggplot(data2pc19df, 
                  aes(x = Priority, y = Freq, fill = Priority)
                  ) +
           geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
           theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
           ggtitle("2019 Crime Count of Severity") +
           xlab("Severity") +
           ylab("Count") +
           theme(legend.position = "right") +
           labs(fill = "Severity")
pcbox19

pcbox19gg <- ggplotly(pcbox19) %>% config(displayModeBar = F)
pcbox19gg
#show 5 numbers and outliers w/ rate
head(data2319tb, 10)
##                   city Priority Freq Census.2010 rate
## 1  ASHTON-SANDY SPRING        0   34        5628  0.6
## 2  ASHTON-SANDY SPRING        3   16        5628  0.3
## 3  ASHTON-SANDY SPRING        2  113        5628  2.0
## 4  ASHTON-SANDY SPRING        4   51        5628  0.9
## 5  ASHTON-SANDY SPRING        1  180        5628  3.2
## 6          BARNESVILLE        0    2         172  1.2
## 7          BARNESVILLE        2   14         172  8.1
## 8          BARNESVILLE        4    3         172  1.7
## 9          BARNESVILLE        1   19         172 11.0
## 10         BARNESVILLE        3    4         172  2.3
data23box19 <- ggplot(data2319tb, 
                  aes(x = Priority, y = rate, fill = Priority)
                  ) +
           geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
           theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
           ggtitle("2019 Crime Rate by Severity") +
           xlab("Severity") +
           ylab("Rate") +
           theme(legend.position = "right") +
           labs(fill = "Severity")
data23box19

data23box19gg <- ggplotly(data23box19) %>% config(displayModeBar = F)
data23box19gg

Getting some tables from a dataset - Median House Price

head(data1hc)
##            city 2017-01 2017-02 2017-03 2017-04 2017-05 2017-06 2017-07 2017-08
## 1 SILVER SPRING  390620  392487  395550  396275  396810  397301  398409  399917
## 2    GERMANTOWN  291883  293267  295705  296331  296689  297053  298068  299153
## 3      BETHESDA  896903  898448  903592  904371  905178  905056  905350  905851
## 4  GAITHERSBURG  368529  370345  373394  374416  375105  375409  376512  377779
## 5     ROCKVILLE  501038  503002  506782  507513  508353  508382  509133  509784
## 6       POTOMAC  951291  953254  959322  960167  961027  960982  962343  964478
##   2017-09 2017-10 2017-11 2017-12 2018-01 2018-02 2018-03 2018-04 2018-05
## 1  400649  401799  403398  404867  403991  405137  406518  409937  411555
## 2  299483  299749  300672  301461  300785  301398  302329  304773  305909
## 3  904372  906099  908490  912808  911467  915507  918452  924509  925451
## 4  378432  378835  379742  380717  379476  379885  380437  383072  383925
## 5  509574  509880  510939  511967  510069  510892  512176  516266  517852
## 6  964886  966953  969420  971791  968505  970046  970918  976201  977724
##   2018-06 2018-07 2018-08 2018-09 2018-10 2018-11 2018-12 2019-01 2019-02
## 1  413016  414837  416193  417308  417280  417085  418167  421278  422691
## 2  306714  307552  308227  308794  308906  308767  309579  311669  312509
## 3  926120  928273  929363  930919  929842  929418  929906  935502  934825
## 4  384539  385236  385666  386160  386305  386470  387711  390667  391811
## 5  519130  520562  521655  522614  522285  521718  522561  526000  526753
## 6  978552  979232  978567  978776  976553  974526  975783  982006  983097
##   2019-03 2019-04 2019-05 2019-06 2019-07 2019-08 2019-09 2019-10 2019-11
## 1  422709  421560  422156  422926  423173  423568  424269  425641  427091
## 2  312226  311086  311325  311863  312191  312607  313284  314321  315430
## 3  932861  928090  928897  930650  931695  933655  935748  938883  941983
## 4  391679  390188  390592  391444  391945  392552  393065  393931  394714
## 5  525500  522429  521891  521992  521660  521549  521675  522626  523679
## 6  980440  975437  974412  974122  973131  972740  972136  972543  972689
##   2019-12
## 1  427807
## 2  316055
## 3  944549
## 4  394826
## 5  523907
## 6  970608
data1hyr <- melt(data1hc, 
                 city.var = c('city', 'year'), 
                 variable.name = 'year')
## Using city as id variables
data1hyr$date <- as.Date(paste(data1hyr$year, "-01", sep =""))
head(data1hyr)
##            city    year  value       date
## 1 SILVER SPRING 2017-01 390620 2017-01-01
## 2    GERMANTOWN 2017-01 291883 2017-01-01
## 3      BETHESDA 2017-01 896903 2017-01-01
## 4  GAITHERSBURG 2017-01 368529 2017-01-01
## 5     ROCKVILLE 2017-01 501038 2017-01-01
## 6       POTOMAC 2017-01 951291 2017-01-01
####Median House price 2017
data1ht17 <- filter(data1hyr, year(date) == 2017)
head(data1ht17)
##            city    year  value       date
## 1 SILVER SPRING 2017-01 390620 2017-01-01
## 2    GERMANTOWN 2017-01 291883 2017-01-01
## 3      BETHESDA 2017-01 896903 2017-01-01
## 4  GAITHERSBURG 2017-01 368529 2017-01-01
## 5     ROCKVILLE 2017-01 501038 2017-01-01
## 6       POTOMAC 2017-01 951291 2017-01-01
data1ht17dec <- filter(data1hyr, year == "2017-12") #December of 2017 of Housing prices
head(data1ht17dec)
##            city    year  value       date
## 1 SILVER SPRING 2017-12 404867 2017-12-01
## 2    GERMANTOWN 2017-12 301461 2017-12-01
## 3      BETHESDA 2017-12 912808 2017-12-01
## 4  GAITHERSBURG 2017-12 380717 2017-12-01
## 5     ROCKVILLE 2017-12 511967 2017-12-01
## 6       POTOMAC 2017-12 971791 2017-12-01
data1h17ln <-ggplot(data1ht17, 
                    aes(x = year, y = value, group = city)
                        ) +
             geom_line(aes(color = city)) +
             geom_point(aes(color = city, 
                            text = paste(paste("City: ", city, "<br>"),
                                         paste("Year: ", year, "<br>"),
                                         paste("Value: ", value))),
                            size = 2, 
                            data = data1ht17
                        ) +
             theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
             ggtitle("Median House Value 2017") +
             xlab ("Year") +
             ylab ("Value(USD$Dollar)") +
             theme(legend.position = "bottom")
## Warning: Ignoring unknown aesthetics: text
data1h17ln

data1h17lngg <- ggplotly(data1h17ln, tooltip = "text") %>% config(displayModeBar = F)
data1h17lngg
####Median House Price 2018
data1ht18 <- filter(data1hyr, year(date) == 2018)
head(data1ht18)
##            city    year  value       date
## 1 SILVER SPRING 2018-01 403991 2018-01-01
## 2    GERMANTOWN 2018-01 300785 2018-01-01
## 3      BETHESDA 2018-01 911467 2018-01-01
## 4  GAITHERSBURG 2018-01 379476 2018-01-01
## 5     ROCKVILLE 2018-01 510069 2018-01-01
## 6       POTOMAC 2018-01 968505 2018-01-01
data1ht18dec <- filter(data1hyr, year == "2018-12") #December of 2018 of Housing prices
head(data1ht18dec)
##            city    year  value       date
## 1 SILVER SPRING 2018-12 418167 2018-12-01
## 2    GERMANTOWN 2018-12 309579 2018-12-01
## 3      BETHESDA 2018-12 929906 2018-12-01
## 4  GAITHERSBURG 2018-12 387711 2018-12-01
## 5     ROCKVILLE 2018-12 522561 2018-12-01
## 6       POTOMAC 2018-12 975783 2018-12-01
data1h18ln <-ggplot(data1ht18, 
                    aes(x = year, y = value, group = city)
                        ) +
             geom_line(aes(color = city)) +
             geom_point(aes(color = city,
                            text = paste(paste("City: ", city, "<br>"),
                                         paste("Year: ", year, "<br>"),
                                         paste("Value: ", value))),
                            size = 2, 
                            data = data1ht18
                        ) +
             theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
             ggtitle("Median House Value 2018") +
             xlab ("Year") +
             ylab ("Value(USD$Dollar)") +
             theme(legend.position = "bottom")
## Warning: Ignoring unknown aesthetics: text
data1h18ln

data1h18lngg <- ggplotly(data1h18ln, tooltip = "text") %>% config(displayModeBar = F)
data1h18lngg
####Median House Price 2019
data1ht19 <- filter(data1hyr, year(date) == 2019)
head(data1ht19)
##            city    year  value       date
## 1 SILVER SPRING 2019-01 421278 2019-01-01
## 2    GERMANTOWN 2019-01 311669 2019-01-01
## 3      BETHESDA 2019-01 935502 2019-01-01
## 4  GAITHERSBURG 2019-01 390667 2019-01-01
## 5     ROCKVILLE 2019-01 526000 2019-01-01
## 6       POTOMAC 2019-01 982006 2019-01-01
data1ht19dec <- filter(data1hyr, year == "2019-12") #December of 2019 of Housing prices
head(data1ht19dec)
##            city    year  value       date
## 1 SILVER SPRING 2019-12 427807 2019-12-01
## 2    GERMANTOWN 2019-12 316055 2019-12-01
## 3      BETHESDA 2019-12 944549 2019-12-01
## 4  GAITHERSBURG 2019-12 394826 2019-12-01
## 5     ROCKVILLE 2019-12 523907 2019-12-01
## 6       POTOMAC 2019-12 970608 2019-12-01
data1h19ln <-ggplot(data1ht19, 
                    aes(x = year, y = value, group = city)
                    ) +
             geom_line(aes(color = city)) +
             geom_point(aes(color = city,
                            text = paste(paste("City: ", city, "<br>"),
                                         paste("Year: ", year, "<br>"),
                                         paste("Value: ", value))),
                            size = 2, 
                            data = data1ht19
                          ) +
             theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
             ggtitle("Median House Value 2019") +
             xlab ("Year") +
             ylab ("Value(USD$Dollar)") +
             theme(legend.position = "bottom")
## Warning: Ignoring unknown aesthetics: text
data1h19ln

data1h19lngg <- ggplotly(data1h19ln, tooltip = "text") %>% config(displayModeBar = F)
data1h19lngg

Merge three datasets - by city, city and December of each year from Housing, city, severity from Police, city and census2010 from population

####merge: 2017 Medien House Price, Police Dispached Incidents, and 2010 Population

df12mrg17 <- merge(data1ht17dec, data2pc17sumdf,  all  = TRUE)
head(df12mrg17)
##                  city    year  value       date Total
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01   295
## 2         BARNESVILLE 2017-12 473649 2017-12-01    28
## 3         BEALLSVILLE 2017-12 411979 2017-12-01    39
## 4            BETHESDA 2017-12 912808 2017-12-01 12982
## 5               BOYDS 2017-12 575839 2017-12-01   904
## 6            BRINKLOW 2017-12 671056 2017-12-01    61
df123mrg17 <- merge(df12mrg17, data3popc,  all  = TRUE)
head(df123mrg17)
##                  city    year  value       date Total Census.2010
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01   295        5628
## 2         BARNESVILLE 2017-12 473649 2017-12-01    28         172
## 3         BEALLSVILLE 2017-12 411979 2017-12-01    39         214
## 4            BETHESDA 2017-12 912808 2017-12-01 12982       60858
## 5               BOYDS 2017-12 575839 2017-12-01   904       10460
## 6            BRINKLOW 2017-12 671056 2017-12-01    61         321
df123mrg17$rate <- round((df123mrg17$Total/df123mrg17$Census.2010)*100, digits = 1)
head(df123mrg17)
##                  city    year  value       date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01   295        5628  5.2
## 2         BARNESVILLE 2017-12 473649 2017-12-01    28         172 16.3
## 3         BEALLSVILLE 2017-12 411979 2017-12-01    39         214 18.2
## 4            BETHESDA 2017-12 912808 2017-12-01 12982       60858 21.3
## 5               BOYDS 2017-12 575839 2017-12-01   904       10460  8.6
## 6            BRINKLOW 2017-12 671056 2017-12-01    61         321 19.0
df123mrg17c <- df123mrg17[-c(2, 4:6)]
row.names(df123mrg17c) <- df123mrg17c$city
head(df123mrg17c)
##                                    city  value rate
## ASHTON-SANDY SPRING ASHTON-SANDY SPRING 632284  5.2
## BARNESVILLE                 BARNESVILLE 473649 16.3
## BEALLSVILLE                 BEALLSVILLE 411979 18.2
## BETHESDA                       BETHESDA 912808 21.3
## BOYDS                             BOYDS 575839  8.6
## BRINKLOW                       BRINKLOW 671056 19.0
df123result17 <- df123mrg17c[-1]
head(df123result17)
##                      value rate
## ASHTON-SANDY SPRING 632284  5.2
## BARNESVILLE         473649 16.3
## BEALLSVILLE         411979 18.2
## BETHESDA            912808 21.3
## BOYDS               575839  8.6
## BRINKLOW            671056 19.0
df123cor17 <- cor(df123result17)
df123cor17
##           value      rate
## value  1.000000 -0.152234
## rate  -0.152234  1.000000
corrplot(df123cor17, method="circle",
         title = "2017 Correlation b/w Median House price and Rate of Crime", 
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

corrplot(df123cor17, method="pie",
         title = "2017 Correlation b/w Median House price and Rate of Crime",
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

corrplot(df123cor17, method="number",
         title = "2017 Correlation b/w Median House price and Rate of Crime", 
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

####merge: 2018 Median House Price, Police Dispached Incidents, and 2010 Population

df12mrg18 <- merge(data1ht18dec, data2pc18sumdf,  all  = TRUE)
#df12mrg18 <- df12mrg18[-c(5)]
head(df12mrg18)
##                  city    year  value       date Total
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01   354
## 2         BARNESVILLE 2018-12 490561 2018-12-01    37
## 3         BEALLSVILLE 2018-12 442491 2018-12-01    44
## 4            BETHESDA 2018-12 929906 2018-12-01 16554
## 5               BOYDS 2018-12 582696 2018-12-01  1211
## 6            BRINKLOW 2018-12 671896 2018-12-01    95
df123mrg18 <- merge(df12mrg18, data3popc,  all  = TRUE)
head(df123mrg18)
##                  city    year  value       date Total Census.2010
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01   354        5628
## 2         BARNESVILLE 2018-12 490561 2018-12-01    37         172
## 3         BEALLSVILLE 2018-12 442491 2018-12-01    44         214
## 4            BETHESDA 2018-12 929906 2018-12-01 16554       60858
## 5               BOYDS 2018-12 582696 2018-12-01  1211       10460
## 6            BRINKLOW 2018-12 671896 2018-12-01    95         321
df123mrg18$rate <- round((df123mrg18$Total/df123mrg18$Census.2010)*100, digits = 1)
head(df123mrg18)
##                  city    year  value       date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01   354        5628  6.3
## 2         BARNESVILLE 2018-12 490561 2018-12-01    37         172 21.5
## 3         BEALLSVILLE 2018-12 442491 2018-12-01    44         214 20.6
## 4            BETHESDA 2018-12 929906 2018-12-01 16554       60858 27.2
## 5               BOYDS 2018-12 582696 2018-12-01  1211       10460 11.6
## 6            BRINKLOW 2018-12 671896 2018-12-01    95         321 29.6
df123mrg18c <- df123mrg18[-c(2, 4:6)]
row.names(df123mrg18c) <- df123mrg18c$city
head(df123mrg18c)
##                                    city  value rate
## ASHTON-SANDY SPRING ASHTON-SANDY SPRING 639672  6.3
## BARNESVILLE                 BARNESVILLE 490561 21.5
## BEALLSVILLE                 BEALLSVILLE 442491 20.6
## BETHESDA                       BETHESDA 929906 27.2
## BOYDS                             BOYDS 582696 11.6
## BRINKLOW                       BRINKLOW 671896 29.6
df123result18 <- df123mrg18c[-1]
head(df123result18)
##                      value rate
## ASHTON-SANDY SPRING 639672  6.3
## BARNESVILLE         490561 21.5
## BEALLSVILLE         442491 20.6
## BETHESDA            929906 27.2
## BOYDS               582696 11.6
## BRINKLOW            671896 29.6
df123cor18 <- cor(df123result18)
df123cor18
##            value       rate
## value  1.0000000 -0.1421164
## rate  -0.1421164  1.0000000
corrplot(df123cor18, method="circle",
         title = "2018 Correlation b/w Median House price and Rate of Crime",
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

corrplot(df123cor18, method="pie",
         title = "2018 Correlation b/w Median House price and Rate of Crime",
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

corrplot(df123cor18, method="number",
         title = "2018 Correlation b/w Median House price and Rate of Crime",
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

####merge: 2019 Median House Price, Police Dispached Incidents, and 2010 Population

df12mrg19 <- merge(data1ht19dec, data2pc19sumdf,  all  = TRUE)
#df12mrg19 <- df12mrg19[-c(5)]
head(df12mrg19)
##                  city    year  value       date Total
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01   394
## 2         BARNESVILLE 2019-12 482206 2019-12-01    42
## 3         BEALLSVILLE 2019-12 433208 2019-12-01    54
## 4            BETHESDA 2019-12 944549 2019-12-01 15451
## 5               BOYDS 2019-12 589645 2019-12-01  1160
## 6            BRINKLOW 2019-12 678525 2019-12-01    97
df123mrg19 <- merge(df12mrg19, data3popc,  all  = TRUE)
head(df123mrg19)
##                  city    year  value       date Total Census.2010
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01   394        5628
## 2         BARNESVILLE 2019-12 482206 2019-12-01    42         172
## 3         BEALLSVILLE 2019-12 433208 2019-12-01    54         214
## 4            BETHESDA 2019-12 944549 2019-12-01 15451       60858
## 5               BOYDS 2019-12 589645 2019-12-01  1160       10460
## 6            BRINKLOW 2019-12 678525 2019-12-01    97         321
df123mrg19$rate <- round((df123mrg19$Total/df123mrg19$Census.2010)*100, digits = 1)
head(df123mrg19)
##                  city    year  value       date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01   394        5628  7.0
## 2         BARNESVILLE 2019-12 482206 2019-12-01    42         172 24.4
## 3         BEALLSVILLE 2019-12 433208 2019-12-01    54         214 25.2
## 4            BETHESDA 2019-12 944549 2019-12-01 15451       60858 25.4
## 5               BOYDS 2019-12 589645 2019-12-01  1160       10460 11.1
## 6            BRINKLOW 2019-12 678525 2019-12-01    97         321 30.2
df123mrg19c <- df123mrg19[-c(2, 4:6)]
row.names(df123mrg19c) <- df123mrg19c$city
head(df123mrg19c)
##                                    city  value rate
## ASHTON-SANDY SPRING ASHTON-SANDY SPRING 645068  7.0
## BARNESVILLE                 BARNESVILLE 482206 24.4
## BEALLSVILLE                 BEALLSVILLE 433208 25.2
## BETHESDA                       BETHESDA 944549 25.4
## BOYDS                             BOYDS 589645 11.1
## BRINKLOW                       BRINKLOW 678525 30.2
df123result19 <- df123mrg19c[-1]
head(df123result19)
##                      value rate
## ASHTON-SANDY SPRING 645068  7.0
## BARNESVILLE         482206 24.4
## BEALLSVILLE         433208 25.2
## BETHESDA            944549 25.4
## BOYDS               589645 11.1
## BRINKLOW            678525 30.2
df123cor19 <- cor(df123result19)
df123cor19
##            value       rate
## value  1.0000000 -0.1328734
## rate  -0.1328734  1.0000000
corrplot(df123cor19, method = "circle", 
         title = "2019 Correlation b/w Median House pricee and Rate of Crime",
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

corrplot(df123cor19, method="pie", 
         title = "2019 Correlation b/w Median House price and Rate of Crime",
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

corrplot(df123cor19, method="number", 
         title = "2019 Correlation b/w Median House price and Rate of Crime",
         addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
         type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))

combine all three years (2017, 2018, 2019)

all3yr <- do.call("rbind", list(df123mrg17, df123mrg18, df123mrg19))
head(all3yr)
##                  city    year  value       date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01   295        5628  5.2
## 2         BARNESVILLE 2017-12 473649 2017-12-01    28         172 16.3
## 3         BEALLSVILLE 2017-12 411979 2017-12-01    39         214 18.2
## 4            BETHESDA 2017-12 912808 2017-12-01 12982       60858 21.3
## 5               BOYDS 2017-12 575839 2017-12-01   904       10460  8.6
## 6            BRINKLOW 2017-12 671056 2017-12-01    61         321 19.0
all3yrgg <- ggplot(all3yr, 
                   aes(x = city, y = rate, group = city, shape = year)
                   ) + 
            geom_point(aes(color = city, size = rate)) + 
            ggtitle("Yearly Crime Rate") +
            theme(axis.text.x = element_text(size = 5)) +
            theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
#           geom_text(aes(label = rate), vjust = -0.3, size = 4) +
            scale_x_discrete("City", breaks = labels, labels = labels) +
            scale_y_continuous("Crime rate(%=crime/population)") +
            facet_wrap( ~ year)
all3yrgg

all3yrballoon <- ggballoonplot(all3yr, 
                               fill = "rate"
                               )+
                 scale_fill_viridis_c(option = "C") +
                 xlab("City") +
                 ylab("Year") +
                 ggtitle("Yearly Crime Rate")
all3yrballoon

#2017
all3yr17 <- filter(all3yr, year == "2017-12") 
head(all3yr17)
##                  city    year  value       date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01   295        5628  5.2
## 2         BARNESVILLE 2017-12 473649 2017-12-01    28         172 16.3
## 3         BEALLSVILLE 2017-12 411979 2017-12-01    39         214 18.2
## 4            BETHESDA 2017-12 912808 2017-12-01 12982       60858 21.3
## 5               BOYDS 2017-12 575839 2017-12-01   904       10460  8.6
## 6            BRINKLOW 2017-12 671056 2017-12-01    61         321 19.0
all3yr17gg <- ggplot(all3yr17, 
                   aes(x = city, y = rate, group = city)
                   ) + 
              geom_point(aes(color = city, size = rate)) + 
              ggtitle("2017 Crime Rate by city") +
              theme(axis.text.x = element_text(size = 5, angle = 70, hjust = 0.9)) +
              theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
              geom_text(aes(label = rate), vjust = -0.3, size = 4) +
              scale_x_discrete("City", breaks = labels, labels = labels) +
              scale_y_continuous("Crime rate(%=crime/population)") +
              theme(legend.position = "right")

all3yr17gg

#2018
all3yr18 <- filter(all3yr, year == "2018-12") 
head(all3yr18)
##                  city    year  value       date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01   354        5628  6.3
## 2         BARNESVILLE 2018-12 490561 2018-12-01    37         172 21.5
## 3         BEALLSVILLE 2018-12 442491 2018-12-01    44         214 20.6
## 4            BETHESDA 2018-12 929906 2018-12-01 16554       60858 27.2
## 5               BOYDS 2018-12 582696 2018-12-01  1211       10460 11.6
## 6            BRINKLOW 2018-12 671896 2018-12-01    95         321 29.6
all3yr18gg <- ggplot(all3yr18, 
                   aes(x = city, y = rate, group = city)
                   ) + 
              geom_point(aes(color = city, size = rate)) + 
              ggtitle("2018 Crime Rate by city") +
              theme(axis.text.x = element_text(size = 5, angle = 70, hjust = 0.9)) +
              theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
              geom_text(aes(label = rate), vjust = -0.3, size = 4) +
              scale_x_discrete("City", breaks = labels, labels = labels) +
              scale_y_continuous("Crime rate(%=crime/population)") +
              theme(legend.position = "right")

all3yr18gg

#2019
all3yr19 <- filter(all3yr, year == "2019-12") 
head(all3yr19)
##                  city    year  value       date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01   394        5628  7.0
## 2         BARNESVILLE 2019-12 482206 2019-12-01    42         172 24.4
## 3         BEALLSVILLE 2019-12 433208 2019-12-01    54         214 25.2
## 4            BETHESDA 2019-12 944549 2019-12-01 15451       60858 25.4
## 5               BOYDS 2019-12 589645 2019-12-01  1160       10460 11.1
## 6            BRINKLOW 2019-12 678525 2019-12-01    97         321 30.2
all3yr19gg <- ggplot(all3yr19, 
                   aes(x = city, y = rate, group = city)
                   ) + 
              geom_point(aes(color = city, size = rate)) + 
              ggtitle("2019 Crime Rate by city") +
              theme(axis.text.x = element_text(size = 5, angle = 70, hjust = 0.9)) +
              theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
              geom_text(aes(label = rate), vjust = -0.3, size = 4) +
              scale_x_discrete("City", breaks = labels, labels = labels) +
              scale_y_continuous("Crime rate(%=crime/population)") +
              theme(legend.position = "right")

all3yr19gg

#place multiple plots together
#gridExtra::grid.arrange(data1h17ln, data1h18ln, data1h19ln, nrow=3)  

check Which cities have the most severe types of crime in 3 years

data2pcgr <- data2pc %>%
             group_by(city) %>%
             summarise(Priority = n())
head(data2pcgr)
## # A tibble: 6 x 2
##   city                Priority
##   <chr>                  <int>
## 1 ASHTON-SANDY SPRING     1130
## 2 BARNESVILLE              113
## 3 BEALLSVILLE              147
## 4 BETHESDA               47781
## 5 BOYDS                   3456
## 6 BRINKLOW                 269
data2pcbar <- ggplot(data2pcgr,
                     aes(x = city, y = Priority)
                     ) +
              geom_bar(fill = "#0073C2FF",
                       stat = "identity",
                       aes(text = paste(paste("City: ", city))),
                       size = 1, 
                       data = data2pcgr
                       ) +
              geom_text(aes(label = Priority),
                        vjust = -0.1
                        ) +
              theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
              ggtitle("Total Severity of Police Dispatched Incidents from 2017 to 2019",
                      subtitle = "per City in Montgomery County, MD"
                      ) +
              ylab("Severity") +
              xlab("City")
## Warning: Ignoring unknown aesthetics: text
data2pcbar

data2pcbargg <- ggplotly(data2pcbar, tooltip = "text") %>% config(displayModeBar = F)
data2pcbargg
data2pcall <- ggplot(data2pcdf, aes(x = city, y = Freq, fill = Severity)
                     ) +
              geom_bar(aes(fill = Severity,
                           text = paste(paste("City: ", city, "<br>"),
                                        paste("Severity: ", Severity, "<br"),
                                        paste("Frequency: ", Freq))),
                       size = 1,
                       data = data2pcdf,
                       stat = "identity",
                       position = position_dodge(0.9)
                       ) +
              theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
              ggtitle("Total Severity of Police Dispatched Incidents from 2017 to 2019",
                      subtitle = "per city in Montgomery County, MD") +
              ylab("Count") +
              xlab("city") 
## Warning: Ignoring unknown aesthetics: text
data2pcall

data2pcallgg <- ggplotly(data2pcall, tootip = "text") %>% config(displayModeBar = F)
data2pcallgg
pcplotly <- plot_ly(data2pcdf2, x =~city , y = ~X0, type = 'bar', name = '0')
pcplotly <- pcplotly %>% add_trace(y = ~X1, name = '1')
pcplotly <- pcplotly %>% add_trace(y = ~X2, name = '2')
pcplotly <- pcplotly %>% add_trace(y = ~X3, name = '3')
pcplotly <- pcplotly %>% add_trace(y = ~X4, name = '4')
pcplotly <- pcplotly %>% layout(yaxis = list(title = 'Freq'), 
                                barmode = 'group', 
                                title = "Total Severity of Police Dispatched Incidents from 2017 to 2019 \nper city in Montgomery County, MD"
                                )  %>%
                         add_annotations(text = "Severty",
                                         xref = "paper",
                                         yref = "paper",
                                         x = 1.02, xanchor = "left",
                                         y = 0.8, yanchor = "bottom",
                                         legendtitle = TRUE, showarrow = FALSE) %>%
                         layout(legend = list(y = 0.8, yanchor = "top")) %>%
                         config(displayModeBar = F)
pcplotly
#maybe Top6 and Low6 cities in Severity rates
##2017
data23mg17 <-  merge(x = data2pc17sumdf, y = data3popc, by.x = "city", all.x = TRUE)
head(data23mg17)
##                  city Total Census.2010
## 1 ASHTON-SANDY SPRING   295        5628
## 2         BARNESVILLE    28         172
## 3         BEALLSVILLE    39         214
## 4            BETHESDA 12982       60858
## 5               BOYDS   904       10460
## 6            BRINKLOW    61         321
##Rate: Total Severity / Population
data23mg17$rate <- round((data23mg17$Total/data23mg17$Census.2010)*100, digits = 1)
head(data23mg17)
##                  city Total Census.2010 rate
## 1 ASHTON-SANDY SPRING   295        5628  5.2
## 2         BARNESVILLE    28         172 16.3
## 3         BEALLSVILLE    39         214 18.2
## 4            BETHESDA 12982       60858 21.3
## 5               BOYDS   904       10460  8.6
## 6            BRINKLOW    61         321 19.0
data2317Top6 <- data23mg17 %>% 
                arrange(desc(rate)) %>% 
                slice(1:5)
data2317Top6 <- data2317Top6[-c(2)]
head(data2317Top6)
##            city Census.2010  rate
## 1    KENSINGTON        2213 136.8
## 2       DERWOOD        2381 125.1
## 3 SILVER SPRING       71452  79.5
## 4     ROCKVILLE       61209  43.7
## 5  GAITHERSBURG       59933  40.9
p1 = alluvial_wide(as.data.frame(data2317Top6), max_variables = 4, bins = 5)
parcats(p1, marginal_histograms = FALSE, hoverinfo = "none") %>% config(displayModeBar = F)
data2317Low6 <- data23mg17 %>% 
                arrange(rate) %>% 
                slice(1:5)
data2317Low6 <- data2317Low6[-c(2)]
head(data2317Low6)
##                  city Census.2010 rate
## 1 ASHTON-SANDY SPRING        5628  5.2
## 2          CABIN JOHN        2280  7.4
## 3        SPENCERVILLE        1594  7.6
## 4               BOYDS       10460  8.6
## 5    WASHINGTON GROVE         555  9.0
p2 = alluvial_wide(as.data.frame(data2317Low6), max_variables = 4, bins = 5)
parcats(p2, marginal_histograms = FALSE, hoverinfo = "none") %>% config(displayModeBar = F)
head(data2pcdf)
##                  city Severity Freq
## 1 ASHTON-SANDY SPRING        0  104
## 2         BARNESVILLE        0    6
## 3         BEALLSVILLE        0   10
## 4            BETHESDA        0 3539
## 5               BOYDS        0  518
## 6            BRINKLOW        0   30
data2pcTop6 <- data2pcdf %>% 
               arrange(desc(Freq)) %>% 
               group_by(Severity) %>% slice(1:5)
head(data2pcTop6)
## # A tibble: 6 x 3
## # Groups:   Severity [2]
##   city          Severity  Freq
##   <fct>         <fct>    <int>
## 1 SILVER SPRING 0        22187
## 2 GAITHERSBURG  0         9198
## 3 ROCKVILLE     0         8442
## 4 GERMANTOWN    0         6747
## 5 BETHESDA      0         3539
## 6 SILVER SPRING 1        83731
data2pcLow6 <- data2pcdf %>% 
               arrange(Freq) %>% 
               group_by(Severity) %>% slice(1:5)
head(data2pcLow6)
## # A tibble: 6 x 3
## # Groups:   Severity [2]
##   city             Severity  Freq
##   <fct>            <fct>    <int>
## 1 BARNESVILLE      0            6
## 2 BEALLSVILLE      0           10
## 3 WASHINGTON GROVE 0           15
## 4 GARRETT PARK     0           19
## 5 GLEN ECHO        0           21
## 6 BEALLSVILLE      1           47
is_alluvia_form(as.data.frame(data2pcTop6), 
                axes = 1:3, silent = TRUE
                )
## [1] TRUE
data2pcT6alluv <- ggplot(as.data.frame(data2pcTop6),
                         aes(y = Freq, axis1 = city, axis2 = Severity), alluvium = city
                         ) +
                  geom_alluvium(aes(fill = city, colour = city),
                                width = 1/4, alpha = 2/3, decreasing = NA
                                ) +
                  geom_stratum(width = 1/12, fill = "black", color = "grey") +
                  geom_label(stat = "stratum", infer.label = TRUE) +
                  scale_x_discrete(limits = c("Severity", "City"), expand = c(.05, .05)) +
                  scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
                  scale_fill_brewer(type = "qual", palette = "Set1") +
                  ggtitle("Top 5 Cities in Severity")
data2pcT6alluv

is_alluvia_form(as.data.frame(data2pcLow6), 
                axes = 1:3, silent = TRUE
                )
## [1] TRUE
data2pcL6alluv <- ggplot(as.data.frame(data2pcLow6),
                         aes(y = Freq, axis1 = city, axis2 = Severity), alluvium = city
                         ) +
                  geom_alluvium(aes(fill = city, colour = city),
                                width = 1/4, alpha = 2/3, decreasing = NA
                                ) +
                  geom_stratum(width = 1/12, fill = "black", color = "grey") +
                  geom_label(stat = "stratum", infer.label = TRUE) +
                  scale_x_discrete(limits = c("City", "Severity"), expand = c(.05, .05)) +
                  scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
                  scale_fill_brewer(type = "qual", palette = "Set1") +
                  ggtitle("Low 6 Cities in Severity")
data2pcL6alluv

p = alluvial_wide(as.data.frame(data2pcTop6), max_variables = 3, bins = 5)
parcats(p, marginal_histograms = FALSE)

##Rate of crime by population

##2018
data23m18 <-  merge(x = data2pc18sumdf, y = data3popc, by.x = "city", all.x = TRUE)
head(data23m18)
##                  city Total Census.2010
## 1 ASHTON-SANDY SPRING   354        5628
## 2         BARNESVILLE    37         172
## 3         BEALLSVILLE    44         214
## 4            BETHESDA 16554       60858
## 5               BOYDS  1211       10460
## 6            BRINKLOW    95         321
data23m18$rate <- round((data23m18$Total/data23m18$Census.2010)*100, digits = 1)
head(data23m18)
##                  city Total Census.2010 rate
## 1 ASHTON-SANDY SPRING   354        5628  6.3
## 2         BARNESVILLE    37         172 21.5
## 3         BEALLSVILLE    44         214 20.6
## 4            BETHESDA 16554       60858 27.2
## 5               BOYDS  1211       10460 11.6
## 6            BRINKLOW    95         321 29.6
##2019
data23m19 <-  merge(x = data2pc19sumdf, y = data3popc, by.x = "city", all.x = TRUE)
head(data23m19)
##                  city Total Census.2010
## 1 ASHTON-SANDY SPRING   394        5628
## 2         BARNESVILLE    42         172
## 3         BEALLSVILLE    54         214
## 4            BETHESDA 15451       60858
## 5               BOYDS  1160       10460
## 6            BRINKLOW    97         321
data23m19$rate <- round((data23m19$Total/data23m19$Census.2010)*100, digits = 1)
head(data23m19)
##                  city Total Census.2010 rate
## 1 ASHTON-SANDY SPRING   394        5628  7.0
## 2         BARNESVILLE    42         172 24.4
## 3         BEALLSVILLE    54         214 25.2
## 4            BETHESDA 15451       60858 25.4
## 5               BOYDS  1160       10460 11.1
## 6            BRINKLOW    97         321 30.2

check Which cities have the least(amount) sever types of cirme

data2pmsv0 <- filter(data2pc, 
                     Priority == "0")
head(data2pmsv0)
##           End.Time Priority                        Close.Type          city
## 1  9/14/2019 14:29        0   TRAFFIC/TRANSPORTATION INCIDENT    GERMANTOWN
## 2   4/2/2017 17:15        0 SUICIDAL PERSON/ATTEMPTED SUICIDE SILVER SPRING
## 3   3/21/2019 5:59        0                     URGENT ASSIST     ROCKVILLE
## 4 10/19/2018 19:48        0   TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 5    4/3/2017 4:18        0 SUICIDAL PERSON/ATTEMPTED SUICIDE SILVER SPRING
## 6   4/3/2017 17:02        0         MENTAL DISORDER - VIA FRS      DAMASCUS
##   Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1                     5D                 99               118             347
## 2                     2D                 99               156             588
## 3                     4D                  0                44             461
## 4                     3D                103               153             335
## 5                     3D                131               191            2227
## 6                     5D                203               312             639
##   Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1             4296                 18             228           3949
## 2             3908                 56             431           3320
## 3            35782                 44             416          35321
## 4             3351                 50             181           3016
## 5             4588                 60            2035           2361
## 6             1159                109             327            520
##              datetime     time       date
## 1 2019-09-14 14:29:00 14:29:00 09/14/2019
## 2 2017-04-02 17:15:00 17:15:00 04/02/2017
## 3 2019-03-21 05:59:00 05:59:00 03/21/2019
## 4 2018-10-19 19:48:00 19:48:00 10/19/2018
## 5 2017-04-03 04:18:00 04:18:00 04/03/2017
## 6 2017-04-03 17:02:00 17:02:00 04/03/2017
data2pm0hist <- data2pmsv0 %>%
                group_by(city) %>%
                summarise(Priority = n())
head(data2pm0hist)
## # A tibble: 6 x 2
##   city                Priority
##   <chr>                  <int>
## 1 ASHTON-SANDY SPRING      104
## 2 BARNESVILLE                6
## 3 BEALLSVILLE               10
## 4 BETHESDA                3539
## 5 BOYDS                    518
## 6 BRINKLOW                  30
data2pm0hist <- ggplot(data2pm0hist,
                       aes(x = city, y = Priority)
                       ) +
                geom_bar(fill = "red",
                         stat = "identity"
                         ) +
                geom_text(aes(label = Priority),
                          vjust = -0.3
                          ) +
                theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
                ggtitle("Most Severity Rate (0) of Police Dispatched Incidents",
                         subtitle = "per city in Montgomery County, MD"
                        ) +
                ylab("Priority(Severity)") +
                xlab("city")


data3popbar <- ggplot(data3popc, 
                      aes(x = city, y = Census.2010)
                      ) +
               geom_bar(fill = "orange", stat = "identity") +
               geom_text(aes(label = Census.2010), vjust = -0.3, size = 4) +
               theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
               theme(plot.title = element_text(hjust = 0, size = 10)) +
               ggtitle("Population of Cities in Montgomery County, MD",
                       subtitle = "(Source: Census Bureau 2010)"
                       ) +
               ylab("Count") +
               xlab("city")
data3popbar

data3popbargg <- ggplotly(data3popbar, tooltip = "text") %>% config(displayModeBar = F)
data3popbargg
data3popln <- ggplot(data3popc, 
                     aes(x = city, y = Census.2010)
                     ) +
              geom_linerange(aes(x = city, ymin = 0, ymax = Census.2010),
                             color = "gray", size = 1.5
                             ) +
              geom_point(aes(color = city,
                             text = paste(paste("City: ", city, "<br>"),
                                          paste("Population 2010: ", Census.2010))),
                         size = 2
                         ) +
              geom_text(aes(label = Census.2010), vjust = -0.6, size = 3) +
              theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
              ggtitle("Population of Cities in Montgomery County, MD",
                       subtitle = "(Source: Census Bureau 2010)"
                      ) +
              ylab("Count") +
              xlab("city")
## Warning: Ignoring unknown aesthetics: text
data3popln

data3poplngg <- ggplotly(data3popln, tooltip = "text") %>% config(displayModeBar = F)
data3poplngg