#Data1: Median House price
data1h <- read.csv(file = "C:/Users/illya/Desktop/DATA 205 Spring 2020/Datasets from Zillow/city_Zhvi_AllHomes.csv", check.names = FALSE)
#Data2: Police Dispached Incidents
data2p <- read.csv(file = "C:/Users/illya/Desktop/DATA 205 Spring 2020/Datasets from dataMontgomery/Police_Dispatched_Incidents.csv")
#Population of Montgomery County
data3pop <- read.csv(file = "C:/Users/illya/Desktop/DATA 205 Spring 2020/Datasets from Census Bureau/mcpopulation.csv")
str(data1h)
## 'data.frame': 27539 obs. of 293 variables:
## $ RegionID : int 6181 12447 39051 17426 6915 13271 40326 18959 54296 38128 ...
## $ RegionName: Factor w/ 17740 levels "Aaronsburg","Abbeville",..: 10709 8814 7060 2696 13581 11979 11991 8284 13589 3615 ...
## $ State : Factor w/ 51 levels "AK","AL","AR",..: 35 5 44 15 44 39 4 34 5 44 ...
## $ Metro : Factor w/ 861 levels "","Aberdeen",..: 547 453 356 152 682 606 607 429 683 194 ...
## $ CountyName: Factor w/ 1752 levels "Abbeville County",..: 1282 928 683 373 141 1219 964 326 1396 417 ...
## $ SizeRank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ 1996-04 : int 194476 183563 94196 129731 93846 69890 99349 149168 191090 101565 ...
## $ 1996-05 : int 194292 183612 94351 129410 93783 69779 99943 149330 190767 101951 ...
## $ 1996-06 : int 194160 183708 94465 129528 93769 69678 100485 149353 190519 102204 ...
## $ 1996-07 : int 194088 183868 94512 129381 93768 69635 100997 149530 190255 102201 ...
## $ 1996-08 : int 193954 183832 94606 129759 93869 69563 101482 149923 189946 102364 ...
## $ 1996-09 : int 193912 183668 94824 130274 93989 69556 101952 150387 189663 102440 ...
## $ 1996-10 : int 193982 183342 95090 131018 94098 69550 102403 150807 189608 102791 ...
## $ 1996-11 : int 194136 183207 95407 131858 94234 69675 102864 150977 189603 102998 ...
## $ 1996-12 : int 194395 183082 95735 132434 94349 69819 103347 151331 189467 103335 ...
## $ 1997-01 : int 194783 182499 95675 133035 94236 69968 103834 151672 189186 103375 ...
## $ 1997-02 : int 195773 181932 95429 133566 94112 70057 104315 152214 189148 103552 ...
## $ 1997-03 : int 196821 181534 95216 134237 94025 70143 104748 152471 189422 103469 ...
## $ 1997-04 : int 197777 181714 95209 134351 94147 70241 105157 153007 189937 103533 ...
## $ 1997-05 : int 198234 181949 95230 135061 94234 70360 105580 153376 190359 103501 ...
## $ 1997-06 : int 198817 182174 95357 135104 94267 70462 106007 153752 190733 103692 ...
## $ 1997-07 : int 199507 182627 95564 135403 94264 70554 106409 153908 191458 104090 ...
## $ 1997-08 : int 200418 183536 95751 134511 94305 70572 106804 153841 192530 104365 ...
## $ 1997-09 : int 201334 184480 95818 133187 94355 70529 107267 153937 193991 104805 ...
## $ 1997-10 : int 202246 185628 95866 131296 94459 70455 107761 154075 195195 105082 ...
## $ 1997-11 : int 203408 186510 96005 129590 94513 70346 108240 154527 196645 105449 ...
## $ 1997-12 : int 204616 187846 96148 128761 94586 70216 108686 154891 198429 105696 ...
## $ 1998-01 : int 205901 189758 96785 128405 95104 70102 109215 155517 200814 106076 ...
## $ 1998-02 : int 206513 192031 97451 128349 95636 70080 109762 156095 203226 106443 ...
## $ 1998-03 : int 207017 194072 98163 128558 96156 70114 110288 156727 205397 106909 ...
## $ 1998-04 : int 207400 195489 98444 128914 96175 70118 110744 156986 207390 107005 ...
## $ 1998-05 : int 208200 197007 98742 129251 96347 70104 111222 157149 209408 107298 ...
## $ 1998-06 : int 209030 198384 98962 129622 96547 70133 111717 157643 211811 107453 ...
## $ 1998-07 : int 209813 199332 99196 130258 96833 70078 112344 158015 213916 107944 ...
## $ 1998-08 : int 210858 200495 99365 131871 96923 70289 113048 158389 216078 108277 ...
## $ 1998-09 : int 212042 202398 99553 134265 96995 70503 113714 158422 217979 108730 ...
## $ 1998-10 : int 213492 204595 99740 137388 97056 70856 114364 158714 220197 108666 ...
## $ 1998-11 : int 214692 206821 99684 140116 97165 70946 115044 159094 222170 108825 ...
## $ 1998-12 : int 215924 208454 99616 142155 97332 71098 115727 159297 224188 108811 ...
## $ 1999-01 : int 216990 210458 99669 143293 97334 71208 116195 159135 225865 109493 ...
## $ 1999-02 : int 218352 212274 100221 144283 97486 71391 116636 158941 227600 110245 ...
## $ 1999-03 : int 219798 214148 101040 145085 97697 71505 117160 158985 229220 111236 ...
## $ 1999-04 : int 221670 215837 101855 146033 98212 71677 117847 159280 231047 112192 ...
## $ 1999-05 : int 223666 217501 102571 146729 98884 71744 118581 159769 233155 112946 ...
## $ 1999-06 : int 225814 219870 103192 147993 99499 71861 119570 160398 235049 113807 ...
## $ 1999-07 : int 228303 222541 103809 149427 100051 72036 120489 161616 237383 114400 ...
## $ 1999-08 : int 230542 224337 104419 150603 100361 72154 121371 162667 239452 115003 ...
## $ 1999-09 : int 232806 225471 104877 151250 100698 72265 122112 163672 241510 115138 ...
## $ 1999-10 : int 234645 226607 105277 151677 101014 72276 122860 164263 243795 115776 ...
## $ 1999-11 : int 236424 228399 105699 152519 101387 72520 123523 164958 246429 116345 ...
## $ 1999-12 : int 238239 230173 106183 153542 101755 72714 124197 165627 249440 117184 ...
## $ 2000-01 : int 240213 231907 106603 155208 102026 73043 125080 166359 252731 117487 ...
## $ 2000-02 : int 242890 233428 106744 156900 102207 73349 126053 166938 255930 117993 ...
## $ 2000-03 : int 245938 235370 106688 158672 102276 73719 127004 167585 259241 118165 ...
## $ 2000-04 : int 248992 237384 106729 160246 102317 74059 127944 168355 262300 118383 ...
## $ 2000-05 : int 251844 239689 106746 161908 101673 74602 128880 169115 265451 118511 ...
## $ 2000-06 : int 254521 241675 106931 163457 101031 75079 129594 169741 269729 118897 ...
## $ 2000-07 : int 257230 243587 107119 164882 100387 75557 130333 170045 274298 119400 ...
## $ 2000-08 : int 259619 245877 107486 166289 100390 75834 131046 170501 279185 119893 ...
## $ 2000-09 : int 262125 248261 107874 167704 100388 76198 131938 171147 283707 120536 ...
## $ 2000-10 : int 264876 250356 108145 169134 100374 76624 132745 171918 287728 121080 ...
## $ 2000-11 : int 268108 252344 108653 170798 100299 77039 133558 172734 291906 121612 ...
## $ 2000-12 : int 270980 254471 109147 172528 100205 77485 134392 173516 295849 122126 ...
## $ 2001-01 : int 273717 256491 109399 174075 100102 77860 135224 174420 299659 122554 ...
## $ 2001-02 : int 276310 258626 109434 175352 99918 78226 136046 175532 304110 122222 ...
## $ 2001-03 : int 279798 260693 109109 176757 99691 78603 136845 176544 309071 122233 ...
## $ 2001-04 : int 283250 263180 108989 178333 99402 79055 137580 177444 314667 122351 ...
## $ 2001-05 : int 286337 265359 109011 180038 99418 79387 138313 178309 319181 122938 ...
## $ 2001-06 : int 288555 267692 109180 181611 99471 79674 139024 179279 322391 122787 ...
## $ 2001-07 : int 290646 269654 109320 182989 99585 79925 139708 180318 324329 122679 ...
## $ 2001-08 : int 292892 271819 109203 184440 99485 80336 140358 181385 326356 122757 ...
## $ 2001-09 : int 294933 273835 109314 185937 99345 80704 141031 182464 328856 123009 ...
## $ 2001-10 : int 297101 276547 109616 187583 99257 81040 141754 183503 331819 123201 ...
## $ 2001-11 : int 299652 279092 109878 189057 99131 81402 142574 184299 334966 123275 ...
## $ 2001-12 : int 302686 281955 110084 190259 99085 81806 143294 185223 338036 123400 ...
## $ 2002-01 : int 305858 284728 110256 191466 99058 82223 143947 186082 341873 123417 ...
## $ 2002-02 : int 307816 288005 110567 192779 99170 82604 144482 186914 345309 123799 ...
## $ 2002-03 : int 307467 291418 111097 194367 99431 83081 145114 187675 348674 123937 ...
## $ 2002-04 : int 306729 295210 111719 195568 99767 83539 145709 188600 351871 124326 ...
## $ 2002-05 : int 306738 298915 112270 196874 100153 84056 146265 189413 356040 124627 ...
## $ 2002-06 : int 308624 303068 112773 197933 100474 84509 146880 190081 361062 125441 ...
## $ 2002-07 : int 310849 307730 113229 199499 100777 85017 147489 190616 366930 126270 ...
## $ 2002-08 : int 313869 312648 113841 201032 101098 85470 148187 191348 373271 126970 ...
## $ 2002-09 : int 317123 317708 114334 202795 101418 86055 148730 192110 379093 127610 ...
## $ 2002-10 : int 320162 322922 114695 204260 101824 86794 149370 193341 385289 128115 ...
## $ 2002-11 : int 322241 328630 115013 205439 102384 87562 150050 194674 391111 128665 ...
## $ 2002-12 : int 324349 334233 115249 206840 102845 88259 150859 196154 399610 129182 ...
## $ 2003-01 : int 326472 339255 115622 208239 103005 88957 151705 197569 407765 129340 ...
## $ 2003-02 : int 329766 343755 115739 210001 102786 89817 152516 198984 415147 129409 ...
## $ 2003-03 : int 334245 348753 115791 211456 102659 90726 153239 199971 420816 129273 ...
## $ 2003-04 : int 339404 353121 115814 213137 102641 91688 153994 200837 425579 129296 ...
## $ 2003-05 : int 344113 358042 115894 214274 102922 92503 154694 201831 430663 129414 ...
## $ 2003-06 : int 348769 362118 116236 215579 103226 93350 155329 203050 434779 129651 ...
## $ 2003-07 : int 352307 367913 116408 216708 103652 94100 155746 204493 440473 129816 ...
## $ 2003-08 : int 355559 374026 116790 218225 104096 94892 156233 206342 445696 129994 ...
## $ 2003-09 : int 357704 380456 116961 219760 104427 95793 156997 209188 452030 130277 ...
## $ 2003-10 : int 360184 385695 117294 221534 104717 96754 157894 212628 457335 130818 ...
## $ 2003-11 : int 362922 392074 117539 223244 104761 97780 158756 216753 463578 131202 ...
## $ 2003-12 : int 365774 399205 117903 224947 105099 98724 159580 220570 464726 131508 ...
## [list output truncated]
#convert the coulumn of "Priority" from integer to string
data2p$Priority <- as.character(data2p$Priority)
str(data2p)
## 'data.frame': 642435 obs. of 26 variables:
## $ Incident_ID : Factor w/ 642435 levels "P1700500000",..: 1 2 4 5 540887 8 562621 11 12 537330 ...
## $ Crime.Reports : Factor w/ 99508 levels "","170500041",..: 1 1 1 1 1 1 1 2 1 1 ...
## $ Crash.Reports : Factor w/ 23951 levels "","170500026",..: 1 1 1 1 20089 1 1 1 1 1 ...
## $ Start.Time : Factor w/ 510889 levels "1/1/2018 0:01",..: 258428 258429 258392 258393 475684 258396 62887 258399 258400 509477 ...
## $ End.Time : Factor w/ 505708 levels "","1/1/2018 0:00",..: 256348 256349 256323 256313 470969 256314 62480 256320 256319 504286 ...
## $ Priority : chr "4" "1" "4" "2" ...
## $ Initial.Type : Factor w/ 177 levels "ABDUCTION (KIDNAPPING) - CUSTODIAL ABDUCTION, HOSTAGE SITUAT",..: 97 12 96 61 158 96 158 57 61 37 ...
## $ Close.Type : Factor w/ 176 levels "10 SIGNAL ALARM",..: 99 12 98 63 158 98 158 59 63 38 ...
## $ Address : Factor w/ 39448 levels "","1 BLK 16TH ST",..: 12811 21298 1328 5829 29612 1328 27238 15306 1 22607 ...
## $ City : Factor w/ 51 levels "","ADELPHI","ALEXANDRIA",..: 46 46 44 44 26 44 46 33 37 18 ...
## $ State : Factor w/ 6 levels "","DC","DCMD",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Zip : Factor w/ 221 levels "","20001","20002",..: 118 123 89 92 104 89 120 114 110 101 ...
## $ Longitude : num -77 -77 -77.2 -77.1 -77.2 ...
## $ Latitude : num 39.1 39 39.1 39.1 39.2 ...
## $ Police.District.Number: Factor w/ 9 levels "1D","2D","3D",..: 4 3 1 4 5 1 3 2 6 5 ...
## $ Beat : Factor w/ 41 levels "","A1","A2","A3",..: 27 18 4 26 29 4 21 11 39 30 ...
## $ PRA : Factor w/ 785 levels "","0","1","10",..: 487 12 175 477 412 175 55 239 361 575 ...
## $ CallTime.CallRoute : int 45 451 201 98 99 57 92 195 141 0 ...
## $ Calltime.Dispatch : int 68 768 218 249 118 306 282 2882 451 77 ...
## $ Calltime.Arrive : int NA 1022 267 992 347 NA 627 3567 878 604 ...
## $ Calltime.Cleared : int 93 1236 18846 1278 4296 556 1801 6426 1393 628 ...
## $ CallRoute.Dispatch : int 22 316 17 150 18 248 189 2686 309 77 ...
## $ Dispatch.Arrive : int NA 253 48 743 228 NA 345 685 427 527 ...
## $ Arrive.Cleared : int NA 214 18579 286 3949 NA 1174 2859 515 24 ...
## $ Disposition.Desc : Factor w/ 459 levels "","ABANDONED LIVESTOCK",..: 336 44 336 424 188 226 188 409 259 336 ...
## $ Location : Factor w/ 36942 levels "","POINT (-76.8682 39.0762)",..: 8127 6227 21679 13952 31434 21679 1668 12081 27899 27795 ...
str(data3pop)
## 'data.frame': 30 obs. of 5 variables:
## $ City : Factor w/ 30 levels "ASHTON-SANDY SPRING",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ State : Factor w/ 1 level "MD": 1 1 1 1 1 1 1 1 1 1 ...
## $ County : Factor w/ 1 level "Montgomery County": 1 1 1 1 1 1 1 1 1 1 ...
## $ Census.2000: int 3437 161 115 15690 55277 NA 343 120 1734 16427 ...
## $ Census.2010: int 5628 172 214 16772 60858 10460 321 134 2280 16660 ...
data1h <- data1h[-c(1, 4, 6:255, 292, 293)]
dim(data1h)
## [1] 27539 39
head(data1h)
## RegionName State CountyName 2017-01 2017-02 2017-03 2017-04
## 1 New York NY Queens County 584554 587374 590257 592355
## 2 Los Angeles CA Los Angeles County 626685 629700 633282 635542
## 3 Houston TX Harris County 169343 169777 170364 171320
## 4 Chicago IL Cook County 229084 230065 231943 233286
## 5 San Antonio TX Bexar County 159974 161667 162749 163152
## 6 Philadelphia PA Philadelphia County 145257 145435 145489 146713
## 2017-05 2017-06 2017-07 2017-08 2017-09 2017-10 2017-11 2017-12 2018-01
## 1 595788 599291 603820 607738 611763 616309 619990 622861 625073
## 2 640115 643978 648564 652647 656640 660186 666312 672414 678260
## 3 171917 172378 172628 172850 173191 173616 174125 174551 175140
## 4 234326 234756 235202 236139 237387 238218 238754 239760 241448
## 5 163373 163943 164820 165871 166885 167955 168964 169341 169216
## 6 148192 149284 150178 150986 151974 153515 154850 156270 157617
## 2018-02 2018-03 2018-04 2018-05 2018-06 2018-07 2018-08 2018-09 2018-10
## 1 628962 632881 636989 640001 643186 645492 648308 651418 653854
## 2 683612 689579 693796 695838 697236 700288 702764 704865 706364
## 3 175609 176262 177303 178418 179290 180016 180912 181981 182788
## 4 242646 242589 242554 243004 243961 244435 244794 245413 246119
## 5 169119 170076 171252 172503 173370 174291 175125 175441 176028
## 6 159632 161964 163759 165033 165917 167216 168577 169961 170934
## 2018-11 2018-12 2019-01 2019-02 2019-03 2019-04 2019-05 2019-06 2019-07
## 1 656243 657655 658328 658200 658150 658181 658154 657709 657309
## 2 708192 708731 709587 706204 702990 700113 701035 702932 704227
## 3 183556 184265 185482 186383 186962 187018 187323 187833 188477
## 4 246857 247150 246646 246233 246214 246644 246922 246991 247258
## 5 176340 177668 178605 179998 180492 181318 181894 182653 183288
## 6 172218 173440 175027 176041 176695 177189 178084 179267 180296
## 2019-08 2019-09 2019-10 2019-11 2019-12
## 1 656607 655407 654263 653269 653086
## 2 706438 709487 713791 717913 724102
## 3 189086 189477 190036 190412 190829
## 4 247325 247065 246830 246605 246497
## 5 183869 184765 185365 186050 186132
## 6 181277 182176 182985 183584 184017
data1h <- filter(data1h, State == "MD")
names(data1h)[names(data1h) == "RegionName"] <- "city"
data1h <- filter(data1h, CountyName == "Montgomery County")
data1h$city <- gsub("Sandy Spring", "Ashton-Sandy Spring", data1h$city)
head(data1h)
## city State CountyName 2017-01 2017-02 2017-03 2017-04
## 1 Silver Spring MD Montgomery County 390620 392487 395550 396275
## 2 Germantown MD Montgomery County 291883 293267 295705 296331
## 3 Bethesda MD Montgomery County 896903 898448 903592 904371
## 4 Gaithersburg MD Montgomery County 368529 370345 373394 374416
## 5 Rockville MD Montgomery County 501038 503002 506782 507513
## 6 North Bethesda MD Montgomery County 499352 500774 504089 504867
## 2017-05 2017-06 2017-07 2017-08 2017-09 2017-10 2017-11 2017-12 2018-01
## 1 396810 397301 398409 399917 400649 401799 403398 404867 403991
## 2 296689 297053 298068 299153 299483 299749 300672 301461 300785
## 3 905178 905056 905350 905851 904372 906099 908490 912808 911467
## 4 375105 375409 376512 377779 378432 378835 379742 380717 379476
## 5 508353 508382 509133 509784 509574 509880 510939 511967 510069
## 6 505304 505726 506854 507860 507751 508018 509023 510024 508005
## 2018-02 2018-03 2018-04 2018-05 2018-06 2018-07 2018-08 2018-09 2018-10
## 1 405137 406518 409937 411555 413016 414837 416193 417308 417280
## 2 301398 302329 304773 305909 306714 307552 308227 308794 308906
## 3 915507 918452 924509 925451 926120 928273 929363 930919 929842
## 4 379885 380437 383072 383925 384539 385236 385666 386160 386305
## 5 510892 512176 516266 517852 519130 520562 521655 522614 522285
## 6 508707 509643 513096 514447 515259 516275 516882 517742 517646
## 2018-11 2018-12 2019-01 2019-02 2019-03 2019-04 2019-05 2019-06 2019-07
## 1 417085 418167 421278 422691 422709 421560 422156 422926 423173
## 2 308767 309579 311669 312509 312226 311086 311325 311863 312191
## 3 929418 929906 935502 934825 932861 928090 928897 930650 931695
## 4 386470 387711 390667 391811 391679 390188 390592 391444 391945
## 5 521718 522561 526000 526753 525500 522429 521891 521992 521660
## 6 517227 518516 522371 523718 523059 520663 520787 521513 521859
## 2019-08 2019-09 2019-10 2019-11 2019-12
## 1 423568 424269 425641 427091 427807
## 2 312607 313284 314321 315430 316055
## 3 933655 935748 938883 941983 944549
## 4 392552 393065 393931 394714 394826
## 5 521549 521675 522626 523679 523907
## 6 522488 523046 524240 525592 526032
data1hc <- filter(data1h,
city == "Ashton-Sandy Spring" | city == "Barnesville" | city ==
"Beallsville" | city == "Bethesda" | city == "Boyds" | city ==
"Brinklow" | city == "Brookeville" | city == "Cabin John" | city
== "Chevy Chase" | city == "Clarksburg" | city == "Damascus" | city
== "Derwood" | city == "Dickerson"| city == "Gaithersburg" | city
== "Garrett Park" | city == "Germantown" | city == "Glen Echo" |
city == "Kensington" | city == "Montgomery Village" | city == "Olney"
| city == "Poolesville" | city == "Potomac" | city == "Rockville" |
city == "Silver Spring" | city == "Spencerville" | city ==
"Washington Grove"
)
#drop Brookville due to outlier with big number
data1hc <- filter(data1hc, city !="Brookeville")
data1hc[, 1] = toupper(data1hc[, 1])
data1hc <- data1hc[-c(2, 3)]
head(data1hc)
## city 2017-01 2017-02 2017-03 2017-04 2017-05 2017-06 2017-07 2017-08
## 1 SILVER SPRING 390620 392487 395550 396275 396810 397301 398409 399917
## 2 GERMANTOWN 291883 293267 295705 296331 296689 297053 298068 299153
## 3 BETHESDA 896903 898448 903592 904371 905178 905056 905350 905851
## 4 GAITHERSBURG 368529 370345 373394 374416 375105 375409 376512 377779
## 5 ROCKVILLE 501038 503002 506782 507513 508353 508382 509133 509784
## 6 POTOMAC 951291 953254 959322 960167 961027 960982 962343 964478
## 2017-09 2017-10 2017-11 2017-12 2018-01 2018-02 2018-03 2018-04 2018-05
## 1 400649 401799 403398 404867 403991 405137 406518 409937 411555
## 2 299483 299749 300672 301461 300785 301398 302329 304773 305909
## 3 904372 906099 908490 912808 911467 915507 918452 924509 925451
## 4 378432 378835 379742 380717 379476 379885 380437 383072 383925
## 5 509574 509880 510939 511967 510069 510892 512176 516266 517852
## 6 964886 966953 969420 971791 968505 970046 970918 976201 977724
## 2018-06 2018-07 2018-08 2018-09 2018-10 2018-11 2018-12 2019-01 2019-02
## 1 413016 414837 416193 417308 417280 417085 418167 421278 422691
## 2 306714 307552 308227 308794 308906 308767 309579 311669 312509
## 3 926120 928273 929363 930919 929842 929418 929906 935502 934825
## 4 384539 385236 385666 386160 386305 386470 387711 390667 391811
## 5 519130 520562 521655 522614 522285 521718 522561 526000 526753
## 6 978552 979232 978567 978776 976553 974526 975783 982006 983097
## 2019-03 2019-04 2019-05 2019-06 2019-07 2019-08 2019-09 2019-10 2019-11
## 1 422709 421560 422156 422926 423173 423568 424269 425641 427091
## 2 312226 311086 311325 311863 312191 312607 313284 314321 315430
## 3 932861 928090 928897 930650 931695 933655 935748 938883 941983
## 4 391679 390188 390592 391444 391945 392552 393065 393931 394714
## 5 525500 522429 521891 521992 521660 521549 521675 522626 523679
## 6 980440 975437 974412 974122 973131 972740 972136 972543 972689
## 2019-12
## 1 427807
## 2 316055
## 3 944549
## 4 394826
## 5 523907
## 6 970608
names(data2p)[names(data2p) == "City"] <- "city"
data2p1 <- data2p[!(is.na(data2p$city) | data2p$city==""), ]
data2p1$city <- gsub("SANDY SPRING", "ASHTON-SANDY SPRING", data2p1$city)
data2p1$city <- gsub("ASHTON", "ASHTON-SANDY SPRING", data2p1$city)
head(data2p1)
## Incident_ID Crime.Reports Crash.Reports Start.Time End.Time
## 1 P1700500000 4/2/2017 6:38 4/2/2017 6:40
## 2 P1700500036 4/2/2017 8:31 4/2/2017 8:52
## 3 P1700500127 4/2/2017 10:49 4/2/2017 16:03
## 4 P1700500146 4/2/2017 11:13 4/2/2017 11:34
## 5 P1900305679 190044039 9/14/2019 13:17 9/14/2019 14:29
## 6 P1700500197 4/2/2017 12:03 4/2/2017 12:13
## Priority Initial.Type
## 1 4 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES,
## 2 1 ALARMB - ALARM BURGLARY/INTRUSION
## 3 4 MIS - MISC ON PATROL
## 4 2 DISTURBANCE/NUISANCE
## 5 0 TRAFFIC/TRANSPORTATION INCIDENT
## 6 4 MIS - MISC ON PATROL
## Close.Type
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES,
## 2 ALARMB - ALARM BURGLARY/INTRUSION
## 3 MIS - MISC ON PATROL
## 4 DISTURBANCE/NUISANCE
## 5 TRAFFIC/TRANSPORTATION INCIDENT
## 6 MIS - MISC ON PATROL
## Address city State Zip Longitude Latitude
## 1 2300 BLK RANDOLPH RD SILVER SPRING MD 20902 -77.04840 39.05842
## 2 8900 BLK FAIRVIEW RD SILVER SPRING MD 20910 -77.02930 39.00069
## 3 1 BLK W MONTGOMERY AVE ROCKVILLE MD 20850 -77.15290 39.08423
## 4 13000 BLK PARKLAND DR ROCKVILLE MD 20853 -77.08880 39.06914
## 5 GUNNERS BRANCH RD / FREDERICK RD GERMANTOWN MD 20876 -77.23811 39.17546
## 6 1 BLK W MONTGOMERY AVE ROCKVILLE MD 20850 -77.15290 39.08423
## Police.District.Number Beat PRA CallTime.CallRoute Calltime.Dispatch
## 1 4D L1 536 45 68
## 2 3D G2 107 451 768
## 3 1D A3 255 201 218
## 4 4D K2 527 98 249
## 5 5D M1 469 99 118
## 6 1D A3 255 57 306
## Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1 NA 93 22 NA
## 2 1022 1236 316 253
## 3 267 18846 17 48
## 4 992 1278 150 743
## 5 347 4296 18 228
## 6 NA 556 248 NA
## Arrive.Cleared Disposition.Desc Location
## 1 NA OTHERMISCELLANEOUS POINT (-77.0484 39.0584)
## 2 214 ALARMRESID-UNKNOWNCAUS POINT (-77.0293 39.0007)
## 3 18579 OTHERMISCELLANEOUS POINT (-77.1529 39.0842)
## 4 286 SUSPICIOUSSIT/PRSON/VEH POINT (-77.0888 39.0691)
## 5 3949 COLOTH-DAMAG-ROAD-COLLI POINT (-77.2381 39.1755)
## 6 NA DISORDERLYCONDUCT POINT (-77.1529 39.0842)
data2p2 <- filter(data2p1,
city == "ASHTON-SANDY SPRING" | city == "BARNESVILLE"| city ==
"BEALLSVILLE"| city == "BETHESDA" | city == "BOYDS"| city ==
"BRINKLOW"| city == "BROOKEVILLE"| city == "CABIN JOHN"| city
== "CHEVY CHASE"| city == "CLARKSBURG" | city == "DAMASCUS"| city
== "DERWOOD"| city == "DICKERSON"| city == "GAITHERSBURG"| city ==
"GARRETT PARK" | city == "GERMANTOWN" | city == "GLEN ECHO" | city
== "KENSINGTON"| city == "MONTGOMERY VILLAGE"| city == "OLNEY"| city
== "POOLESVILLE"| city == "POTOMAC"| city == "ROCKVILLE"| city ==
"SILVER SPRING" | city == "SPENCERVILLE"| city == "WASHINGTON GROVE"
)
dim(data2p2)
## [1] 630566 26
data2pcf <- data2p2[-c(1:4, 7, 9, 13, 14, 16:17, 25, 26)]
dim(data2pcf)
## [1] 630566 14
data2pcf <- filter(data2pcf,
Police.District.Number == "1D" | Police.District.Number == "2D" |
Police.District.Number == "3D" | Police.District.Number == "4D" |
Police.District.Number == "5D" | Police.District.Number == "6D"
)
dim(data2pcf)
## [1] 630502 14
data2pcf <- filter(data2pcf,
Priority == "0" | Priority == "1" | Priority == "2" | Priority == "3"
| Priority == "4"
)
head(data2pcf)
## End.Time Priority
## 1 4/2/2017 6:40 4
## 2 4/2/2017 8:52 1
## 3 4/2/2017 16:03 4
## 4 4/2/2017 11:34 2
## 5 9/14/2019 14:29 0
## 6 4/2/2017 12:13 4
## Close.Type city State
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING MD
## 2 ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING MD
## 3 MIS - MISC ON PATROL ROCKVILLE MD
## 4 DISTURBANCE/NUISANCE ROCKVILLE MD
## 5 TRAFFIC/TRANSPORTATION INCIDENT GERMANTOWN MD
## 6 MIS - MISC ON PATROL ROCKVILLE MD
## Zip Police.District.Number CallTime.CallRoute Calltime.Dispatch
## 1 20902 4D 45 68
## 2 20910 3D 451 768
## 3 20850 1D 201 218
## 4 20853 4D 98 249
## 5 20876 5D 99 118
## 6 20850 1D 57 306
## Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1 NA 93 22 NA
## 2 1022 1236 316 253
## 3 267 18846 17 48
## 4 992 1278 150 743
## 5 347 4296 18 228
## 6 NA 556 248 NA
## Arrive.Cleared
## 1 NA
## 2 214
## 3 18579
## 4 286
## 5 3949
## 6 NA
#Split date and time
data2pcf$datetime <- as.POSIXct(data2pcf$End.Time,
format = "%m/%d/%Y %H:%M"
)
data2pc <- transform(data2pcf,
time = format(datetime, "%T"),
date = format(datetime, "%m/%d/%Y")
)
#drop Brookville due to outlier with big number
data2pc <- filter(data2pc, city !="BROOKEVILLE")
data2pc <- data2pc[-c(5, 6)]
head(data2pc)
## End.Time Priority
## 1 4/2/2017 6:40 4
## 2 4/2/2017 8:52 1
## 3 4/2/2017 16:03 4
## 4 4/2/2017 11:34 2
## 5 9/14/2019 14:29 0
## 6 4/2/2017 12:13 4
## Close.Type city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2 ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3 MIS - MISC ON PATROL ROCKVILLE
## 4 DISTURBANCE/NUISANCE ROCKVILLE
## 5 TRAFFIC/TRANSPORTATION INCIDENT GERMANTOWN
## 6 MIS - MISC ON PATROL ROCKVILLE
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 4D 45 68 NA
## 2 3D 451 768 1022
## 3 1D 201 218 267
## 4 4D 98 249 992
## 5 5D 99 118 347
## 6 1D 57 306 NA
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 93 22 NA NA
## 2 1236 316 253 214
## 3 18846 17 48 18579
## 4 1278 150 743 286
## 5 4296 18 228 3949
## 6 556 248 NA NA
## datetime time date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 16:03:00 16:03:00 04/02/2017
## 4 2017-04-02 11:34:00 11:34:00 04/02/2017
## 5 2019-09-14 14:29:00 14:29:00 09/14/2019
## 6 2017-04-02 12:13:00 12:13:00 04/02/2017
names(data3pop)[names(data3pop) == "City"] <- "city"
data3popc <- filter(data3pop,
city == "ASHTON-SANDY SPRING" | city == "BARNESVILLE"| city ==
"BEALLSVILLE"| city == "BETHESDA" | city == "BOYDS"| city ==
"BRINKLOW"| city == "BROOKEVILLE"| city == "CABIN JOHN"| city ==
"CHEVY CHASE"| city == "CLARKSBURG" | city == "DAMASCUS"| city ==
"DERWOOD"| city == "DICKERSON"| city == "GAITHERSBURG"| city ==
"GARRETT PARK" | city == "GERMANTOWN" | city == "GLEN ECHO" | city
== "KENSINGTON"| city == "MONTGOMERY VILLAGE"| city == "OLNEY"| city
== "POOLESVILLE"| city == "POTOMAC"| city == "ROCKVILLE"| city ==
"SILVER SPRING" | city == "SPENCERVILLE"| city == "WASHINGTON GROVE"
)
#drop Brookville due to outlier with big number
data3popc <- filter(data3popc, city !="BROOKEVILLE")
data3popc <- data3popc[-c(2: 4)]
head(data3popc)
## city Census.2010
## 1 ASHTON-SANDY SPRING 5628
## 2 BARNESVILLE 172
## 3 BEALLSVILLE 214
## 4 BETHESDA 60858
## 5 BOYDS 10460
## 6 BRINKLOW 321
data2pctable <- table(data2pc$city,
data2pc$Priority)
names(dimnames(data2pctable)) <- c("city", "Severity")
data2pctable
## Severity
## city 0 1 2 3 4
## ASHTON-SANDY SPRING 104 515 294 67 150
## BARNESVILLE 6 48 35 11 13
## BEALLSVILLE 10 47 55 13 22
## BETHESDA 3539 21062 11918 2595 8667
## BOYDS 518 1420 755 233 530
## BRINKLOW 30 119 71 15 34
## CABIN JOHN 54 298 114 62 153
## CHEVY CHASE 1087 7124 3759 760 2962
## CLARKSBURG 822 4625 1756 713 1933
## DAMASCUS 666 2557 1127 527 892
## DERWOOD 1232 4538 2930 1024 1698
## DICKERSON 102 424 363 120 131
## GAITHERSBURG 9198 37908 21066 6732 14960
## GARRETT PARK 19 166 60 27 62
## GERMANTOWN 6747 24962 13491 4646 8741
## GLEN ECHO 21 69 44 4 40
## KENSINGTON 930 5046 2258 743 2010
## MONTGOMERY VILLAGE 2551 9397 3602 1934 3644
## OLNEY 950 5048 2200 888 1915
## POOLESVILLE 225 1087 435 206 360
## POTOMAC 1424 11467 4100 1224 3347
## ROCKVILLE 8442 43076 21999 7710 20005
## SILVER SPRING 22187 83731 56328 14187 35425
## SPENCERVILLE 53 159 101 31 54
## WASHINGTON GROVE 15 92 29 22 37
#convert the datatable to a dataframe
data2pcdf <- as.data.frame(data2pctable)
head(data2pcdf)
## city Severity Freq
## 1 ASHTON-SANDY SPRING 0 104
## 2 BARNESVILLE 0 6
## 3 BEALLSVILLE 0 10
## 4 BETHESDA 0 3539
## 5 BOYDS 0 518
## 6 BRINKLOW 0 30
#give this description
data2pcdf2 <- data.frame(unclass(table(data2pc$city,
data2pc$Priority))
)
data2pcdf2$city <- row.names(data2pcdf2)
head(data2pcdf2)
## X0 X1 X2 X3 X4 city
## ASHTON-SANDY SPRING 104 515 294 67 150 ASHTON-SANDY SPRING
## BARNESVILLE 6 48 35 11 13 BARNESVILLE
## BEALLSVILLE 10 47 55 13 22 BEALLSVILLE
## BETHESDA 3539 21062 11918 2595 8667 BETHESDA
## BOYDS 518 1420 755 233 530 BOYDS
## BRINKLOW 30 119 71 15 34 BRINKLOW
data2pcRS <- rowSums(data2pctable)
head(data2pcRS)
## ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA
## 1130 113 147 47781
## BOYDS BRINKLOW
## 3456 269
summary(data2pcRS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 113 398 5769 25083 21128 211858
rowMeans(data2pctable)
## ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA
## 226.0 22.6 29.4 9556.2
## BOYDS BRINKLOW CABIN JOHN CHEVY CHASE
## 691.2 53.8 136.2 3138.4
## CLARKSBURG DAMASCUS DERWOOD DICKERSON
## 1969.8 1153.8 2284.4 228.0
## GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO
## 17972.8 66.8 11717.4 35.6
## KENSINGTON MONTGOMERY VILLAGE OLNEY POOLESVILLE
## 2197.4 4225.6 2200.2 462.6
## POTOMAC ROCKVILLE SILVER SPRING SPENCERVILLE
## 4312.4 20246.4 42371.6 79.6
## WASHINGTON GROVE
## 39.0
data2pcCS <- colSums(data2pctable)
head(data2pcCS)
## 0 1 2 3 4
## 60932 264985 148890 44494 107785
summary(data2pcCS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 44494 60932 107785 125417 148890 264985
colMeans(data2pctable)
## 0 1 2 3 4
## 2437.28 10599.40 5955.60 1779.76 4311.40
#Severity / Year / Hour
data2pc171819 <- filter(data2pc,
year(datetime) == 2017 || year(datetime) == 2018 ||
year(datetime) == 2019
)
head(data2pc171819)
## End.Time Priority
## 1 4/2/2017 6:40 4
## 2 4/2/2017 8:52 1
## 3 4/2/2017 16:03 4
## 4 4/2/2017 11:34 2
## 5 9/14/2019 14:29 0
## 6 4/2/2017 12:13 4
## Close.Type city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2 ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3 MIS - MISC ON PATROL ROCKVILLE
## 4 DISTURBANCE/NUISANCE ROCKVILLE
## 5 TRAFFIC/TRANSPORTATION INCIDENT GERMANTOWN
## 6 MIS - MISC ON PATROL ROCKVILLE
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 4D 45 68 NA
## 2 3D 451 768 1022
## 3 1D 201 218 267
## 4 4D 98 249 992
## 5 5D 99 118 347
## 6 1D 57 306 NA
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 93 22 NA NA
## 2 1236 316 253 214
## 3 18846 17 48 18579
## 4 1278 150 743 286
## 5 4296 18 228 3949
## 6 556 248 NA NA
## datetime time date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 16:03:00 16:03:00 04/02/2017
## 4 2017-04-02 11:34:00 11:34:00 04/02/2017
## 5 2019-09-14 14:29:00 14:29:00 09/14/2019
## 6 2017-04-02 12:13:00 12:13:00 04/02/2017
data2pc171819$year <- year(data2pc171819$datetime)
data2pc171819 <- data2pc171819[-c(1, 3:12, 14:15)]
data2pc171819$hour <- hour(data2pc171819$datetime)
data2pc171819 <- data2pc171819[-c(2)]
data2pc171819$Priority <- as.numeric(data2pc171819$Priority)
head(data2pc171819)
## Priority year hour
## 1 4 2017 6
## 2 1 2017 8
## 3 4 2017 16
## 4 2 2017 11
## 5 0 2019 14
## 6 4 2017 12
tbpc171819 <- table(data2pc171819$Priority, data2pc171819$hour, data2pc171819$year)
head(tbpc171819)
## [1] 952 2749 1837 292 500 676
tbpc171819df <- as.data.frame(tbpc171819)
head(tbpc171819df)
## Var1 Var2 Var3 Freq
## 1 0 0 2017 952
## 2 1 0 2017 2749
## 3 2 0 2017 1837
## 4 3 0 2017 292
## 5 4 0 2017 500
## 6 0 1 2017 676
names(tbpc171819df)[names(tbpc171819df) == "Var1"] <- "severity"
names(tbpc171819df)[names(tbpc171819df) == "Var2"] <- "hour"
names(tbpc171819df)[names(tbpc171819df) == "Var3"] <- "year"
head(tbpc171819df)
## severity hour year Freq
## 1 0 0 2017 952
## 2 1 0 2017 2749
## 3 2 0 2017 1837
## 4 3 0 2017 292
## 5 4 0 2017 500
## 6 0 1 2017 676
#barplot(tbpc171819)
#2017 whole year
data2pc17 <- filter(data2pc, year(datetime) == 2017)
head(data2pc17)
## End.Time Priority
## 1 4/2/2017 6:40 4
## 2 4/2/2017 8:52 1
## 3 4/2/2017 16:03 4
## 4 4/2/2017 11:34 2
## 5 4/2/2017 12:13 4
## 6 4/2/2017 14:51 1
## Close.Type city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2 ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3 MIS - MISC ON PATROL ROCKVILLE
## 4 DISTURBANCE/NUISANCE ROCKVILLE
## 5 MIS - MISC ON PATROL ROCKVILLE
## 6 DECEASED PERSON KENSINGTON
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 4D 45 68 NA
## 2 3D 451 768 1022
## 3 1D 201 218 267
## 4 4D 98 249 992
## 5 1D 57 306 NA
## 6 2D 195 2882 3567
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 93 22 NA NA
## 2 1236 316 253 214
## 3 18846 17 48 18579
## 4 1278 150 743 286
## 5 556 248 NA NA
## 6 6426 2686 685 2859
## datetime time date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 16:03:00 16:03:00 04/02/2017
## 4 2017-04-02 11:34:00 11:34:00 04/02/2017
## 5 2017-04-02 12:13:00 12:13:00 04/02/2017
## 6 2017-04-02 14:51:00 14:51:00 04/02/2017
pc17table <- table(data2pc17$Priority, data2pc17$city)
rowSums(pc17table)
## 0 1 2 3 4
## 17620 69158 44132 10443 28196
hmdfpc17 <- data2pc17[-c(1, 3:12, 14:15)]
hmdfpc17$hour <- hour(hmdfpc17$datetime)
hmdfpc17 <- hmdfpc17[-c(2)]
hmdfpc17$Priority <- as.numeric(hmdfpc17$Priority)
head(hmdfpc17)
## Priority hour
## 1 4 6
## 2 1 8
## 3 4 16
## 4 2 11
## 5 4 12
## 6 1 14
tbpc17 <- table(hmdfpc17$Priority, hmdfpc17$hour)
head(tbpc17)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 0 952 676 554 473 369 279 309 359 451 529 622 611 683 743 765
## 1 2749 2099 1704 1353 1176 1105 1091 1721 2351 2818 3112 3217 3465 3463 3407
## 2 1837 1423 954 710 588 472 531 976 1459 1680 1708 1808 2012 2243 2155
## 3 292 228 172 113 88 74 71 140 268 421 509 572 562 619 570
## 4 500 337 184 163 133 120 203 622 1092 1477 1780 1889 1963 1819 1684
##
## 15 16 17 18 19 20 21 22 23
## 0 992 1007 964 1049 1068 1048 1072 1089 956
## 1 3510 4132 3888 3850 4065 3853 4002 3807 3220
## 2 2449 2936 3000 2910 2670 2290 2342 2727 2252
## 3 584 734 708 720 724 653 636 592 393
## 4 1815 2348 2035 1685 1658 1463 1265 1212 749
hourtable <- table(hmdfpc17$hour)
density <- density(hmdfpc17$hour)
barplot(tbpc17)
tbpc17df <- as.data.frame(tbpc17)
names(tbpc17df)[names(tbpc17df) == "Var1"] <- "severity"
names(tbpc17df)[names(tbpc17df) == "Var2"] <- "hour"
head(tbpc17df)
## severity hour Freq
## 1 0 0 952
## 2 1 0 2749
## 3 2 0 1837
## 4 3 0 292
## 5 4 0 500
## 6 0 1 676
htpc17 <- ggplot(tbpc17df,
aes(hour, severity, fill = Freq)
) +
geom_tile() +
scale_fill_gradient(low = "white", high = "orange") +
geom_text(aes(label = Freq), size = 3) +
ggtitle("2017 Hourly Crime per Severity (0 = most dangerous)")
htpc17
#split the time quarterly in a day
#2017 Day1: 00:00 to 05:59
data2pc17td1 <- filter(data2pc17, hour(datetime) >= 0 & hour(datetime) < 6)
head(data2pc17td1)
## End.Time Priority Close.Type city
## 1 4/3/2017 1:15 3 MISSING, RUNAWAY, FOUND PERSON ROCKVILLE
## 2 4/3/2017 3:03 1 MENTAL DISORDER GAITHERSBURG
## 3 4/3/2017 4:18 0 SUICIDAL PERSON/ATTEMPTED SUICIDE SILVER SPRING
## 4 4/3/2017 5:53 1 RESCUE WITH FRS GAITHERSBURG
## 5 4/4/2017 0:59 1 RESCUE WITH FRS ROCKVILLE
## 6 4/4/2017 1:21 2 NON-PRIORITY RESPONSE TRANSPORT POTOMAC
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 1D 401 656 NA
## 2 6D 322 447 604
## 3 3D 131 191 2227
## 4 6D 264 376 976
## 5 1D 228 670 951
## 6 1D 260 410 NA
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 1431 255 NA NA
## 2 2059 124 157 1455
## 3 4588 60 2035 2361
## 4 1803 112 600 826
## 5 1175 441 280 224
## 6 758 149 NA NA
## datetime time date
## 1 2017-04-03 01:15:00 01:15:00 04/03/2017
## 2 2017-04-03 03:03:00 03:03:00 04/03/2017
## 3 2017-04-03 04:18:00 04:18:00 04/03/2017
## 4 2017-04-03 05:53:00 05:53:00 04/03/2017
## 5 2017-04-04 00:59:00 00:59:00 04/04/2017
## 6 2017-04-04 01:21:00 01:21:00 04/04/2017
day1pc17 <- data2pc17td1[-c(1, 3:12, 14:15)]
day1pc17$hour <- hour(day1pc17$datetime)
day1pc17 <- day1pc17[-c(2)]
day1pc17$Priority <- as.numeric(day1pc17$Priority)
head(day1pc17)
## Priority hour
## 1 3 1
## 2 1 3
## 3 0 4
## 4 1 5
## 5 1 0
## 6 2 1
d1pc17table <- table(day1pc17$Priority, day1pc17$hour)
head(d1pc17table)
##
## 0 1 2 3 4 5
## 0 952 676 554 473 369 279
## 1 2749 2099 1704 1353 1176 1105
## 2 1837 1423 954 710 588 472
## 3 292 228 172 113 88 74
## 4 500 337 184 163 133 120
d1pc17hrtable <- table(day1pc17$hour)
d1pc17density <- density(day1pc17$hour)
barplot(d1pc17table)
d1pc17tbdf <- as.data.frame(d1pc17table)
names(d1pc17tbdf)[names(d1pc17tbdf) == "Var1"] <- "severity"
names(d1pc17tbdf)[names(d1pc17tbdf) == "Var2"] <- "hour"
head(d1pc17tbdf)
## severity hour Freq
## 1 0 0 952
## 2 1 0 2749
## 3 2 0 1837
## 4 3 0 292
## 5 4 0 500
## 6 0 1 676
#2017 Day2: 06:00 to 11:59
data2pc17td2 <- filter(data2pc17, hour(datetime) >= 6 & hour(datetime) < 12)
head(data2pc17td2)
## End.Time Priority
## 1 4/2/2017 6:40 4
## 2 4/2/2017 8:52 1
## 3 4/2/2017 11:34 2
## 4 4/3/2017 9:17 1
## 5 4/3/2017 10:03 1
## 6 4/3/2017 10:50 2
## Close.Type city
## 1 MISC-ADMIN (DOCUMENT, LOST OR FOUND PROPERTY, MESSAGES, SILVER SPRING
## 2 ALARMB - ALARM BURGLARY/INTRUSION SILVER SPRING
## 3 DISTURBANCE/NUISANCE ROCKVILLE
## 4 DECEASED PERSON GERMANTOWN
## 5 BOX ALARM - VIA FRS GAITHERSBURG
## 6 FRAUD/DECEPTION ROCKVILLE
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 4D 45 68 NA
## 2 3D 451 768 1022
## 3 4D 98 249 992
## 4 5D 62 291 599
## 5 6D 82 456 757
## 6 1D 79 542 725
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 93 22 NA NA
## 2 1236 316 253 214
## 3 1278 150 743 286
## 4 12877 229 307 12278
## 5 1532 374 300 775
## 6 3799 462 183 3073
## datetime time date
## 1 2017-04-02 06:40:00 06:40:00 04/02/2017
## 2 2017-04-02 08:52:00 08:52:00 04/02/2017
## 3 2017-04-02 11:34:00 11:34:00 04/02/2017
## 4 2017-04-03 09:17:00 09:17:00 04/03/2017
## 5 2017-04-03 10:03:00 10:03:00 04/03/2017
## 6 2017-04-03 10:50:00 10:50:00 04/03/2017
day2pc17 <- data2pc17td2[-c(1, 3:12, 14:15)]
day2pc17$hour <- hour(day2pc17$datetime)
day2pc17 <- day2pc17[-c(2)]
day2pc17$Priority <- as.numeric(day2pc17$Priority)
head(day2pc17)
## Priority hour
## 1 4 6
## 2 1 8
## 3 2 11
## 4 1 9
## 5 1 10
## 6 2 10
d2pc17table <- table(day2pc17$Priority, day2pc17$hour)
head(d2pc17table)
##
## 6 7 8 9 10 11
## 0 309 359 451 529 622 611
## 1 1091 1721 2351 2818 3112 3217
## 2 531 976 1459 1680 1708 1808
## 3 71 140 268 421 509 572
## 4 203 622 1092 1477 1780 1889
d2pc17hrtable <- table(day2pc17$hour)
d2pc17density <- density(day2pc17$hour)
barplot(d2pc17table)
d2pc17tbdf <- as.data.frame(d2pc17table)
names(d2pc17tbdf)[names(d2pc17tbdf) == "Var1"] <- "severity"
names(d2pc17tbdf)[names(d2pc17tbdf) == "Var2"] <- "hour"
head(d2pc17tbdf)
## severity hour Freq
## 1 0 6 309
## 2 1 6 1091
## 3 2 6 531
## 4 3 6 71
## 5 4 6 203
## 6 0 7 359
#2017 Day3: 12:00 to 17:59
data2pc17tn1 <- filter(data2pc17, hour(datetime) >= 12 & hour(datetime) < 18)
head(data2pc17tn1)
## End.Time Priority Close.Type city
## 1 4/2/2017 16:03 4 MIS - MISC ON PATROL ROCKVILLE
## 2 4/2/2017 12:13 4 MIS - MISC ON PATROL ROCKVILLE
## 3 4/2/2017 14:51 1 DECEASED PERSON KENSINGTON
## 4 4/2/2017 14:36 1 DISTURBANCE/NUISANCE MONTGOMERY VILLAGE
## 5 4/2/2017 16:05 2 STATION RESPONSE SILVER SPRING
## 6 4/2/2017 17:15 0 SUICIDAL PERSON/ATTEMPTED SUICIDE SILVER SPRING
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 1D 201 218 267
## 2 1D 57 306 NA
## 3 2D 195 2882 3567
## 4 6D 141 451 878
## 5 4D 101 500 520
## 6 2D 99 156 588
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 18846 17 48 18579
## 2 556 248 NA NA
## 3 6426 2686 685 2859
## 4 1393 309 427 515
## 5 2750 399 19 2229
## 6 3908 56 431 3320
## datetime time date
## 1 2017-04-02 16:03:00 16:03:00 04/02/2017
## 2 2017-04-02 12:13:00 12:13:00 04/02/2017
## 3 2017-04-02 14:51:00 14:51:00 04/02/2017
## 4 2017-04-02 14:36:00 14:36:00 04/02/2017
## 5 2017-04-02 16:05:00 16:05:00 04/02/2017
## 6 2017-04-02 17:15:00 17:15:00 04/02/2017
day3pc17 <- data2pc17tn1[-c(1, 3:12, 14:15)]
day3pc17$hour <- hour(day3pc17$datetime)
day3pc17 <- day3pc17[-c(2)]
day3pc17$Priority <- as.numeric(day3pc17$Priority)
head(day3pc17)
## Priority hour
## 1 4 16
## 2 4 12
## 3 1 14
## 4 1 14
## 5 2 16
## 6 0 17
d3pc17table <- table(day3pc17$Priority, day3pc17$hour)
head(d3pc17table)
##
## 12 13 14 15 16 17
## 0 683 743 765 992 1007 964
## 1 3465 3463 3407 3510 4132 3888
## 2 2012 2243 2155 2449 2936 3000
## 3 562 619 570 584 734 708
## 4 1963 1819 1684 1815 2348 2035
d3pc17hrtable <- table(day3pc17$hour)
d3pc17density <- density(day3pc17$hour)
barplot(d3pc17table)
d3pc17tbdf <- as.data.frame(d3pc17table)
names(d3pc17tbdf)[names(d3pc17tbdf) == "Var1"] <- "severity"
names(d3pc17tbdf)[names(d3pc17tbdf) == "Var2"] <- "hour"
head(d3pc17tbdf)
## severity hour Freq
## 1 0 12 683
## 2 1 12 3465
## 3 2 12 2012
## 4 3 12 562
## 5 4 12 1963
## 6 0 13 743
#2017 Day4: 18:00 to 23:59
data2pc17tn2 <- filter(data2pc17, hour(datetime) >= 18 & hour(datetime) < 24)
head(data2pc17tn2)
## End.Time Priority Close.Type city
## 1 4/2/2017 18:59 2 NON-PRIORITY RESPONSE TRANSPORT POTOMAC
## 2 4/2/2017 18:13 1 RESCUE WITH FRS SILVER SPRING
## 3 4/2/2017 23:57 2 ASSIST OTHER AGENCY GAITHERSBURG
## 4 4/2/2017 20:35 2 THEFT/LARCENY POTOMAC
## 5 4/2/2017 22:40 2 NON-PRIORITY RESPONSE TRANSPORT POTOMAC
## 6 4/2/2017 22:16 2 TRAFFIC VIOLATION SILVER SPRING
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 1D 0 2085 4612
## 2 3D 139 217 NA
## 3 6D 0 17533 NA
## 4 1D 323 3300 4237
## 5 1D 197 2997 3847
## 6 3D 47 394 NA
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 9423 2085 2527 4811
## 2 1103 77 NA NA
## 3 17630 17533 NA NA
## 4 5464 2976 936 1226
## 5 6822 2799 849 2974
## 6 1967 347 NA NA
## datetime time date
## 1 2017-04-02 18:59:00 18:59:00 04/02/2017
## 2 2017-04-02 18:13:00 18:13:00 04/02/2017
## 3 2017-04-02 23:57:00 23:57:00 04/02/2017
## 4 2017-04-02 20:35:00 20:35:00 04/02/2017
## 5 2017-04-02 22:40:00 22:40:00 04/02/2017
## 6 2017-04-02 22:16:00 22:16:00 04/02/2017
day4pc17 <- data2pc17tn2[-c(1, 3:12, 14:15)]
day4pc17$hour <- hour(day4pc17$datetime)
day4pc17 <- day4pc17[-c(2)]
day4pc17$Priority <- as.numeric(day4pc17$Priority)
head(day4pc17)
## Priority hour
## 1 2 18
## 2 1 18
## 3 2 23
## 4 2 20
## 5 2 22
## 6 2 22
d4pc17table <- table(day4pc17$Priority, day4pc17$hour)
head(d4pc17table)
##
## 18 19 20 21 22 23
## 0 1049 1068 1048 1072 1089 956
## 1 3850 4065 3853 4002 3807 3220
## 2 2910 2670 2290 2342 2727 2252
## 3 720 724 653 636 592 393
## 4 1685 1658 1463 1265 1212 749
d4pc17hrtable <- table(day4pc17$hour)
d4pc17density <- density(day4pc17$hour)
barplot(d4pc17table)
d4pc17tbdf <- as.data.frame(d4pc17table)
names(d4pc17tbdf)[names(d4pc17tbdf) == "Var1"] <- "severity"
names(d4pc17tbdf)[names(d4pc17tbdf) == "Var2"] <- "hour"
head(d4pc17tbdf)
## severity hour Freq
## 1 0 18 1049
## 2 1 18 3850
## 3 2 18 2910
## 4 3 18 720
## 5 4 18 1685
## 6 0 19 1068
vd4pc17tbdf <- {ggplot(d4pc17tbdf,
aes(x = severity, y = Freq, fill = severity)
) +
geom_violin(trim = FALSE) +
stat_summary(fun.y = median, geom = "pointrange", mult = 1, color = "red")
} %>%
ggplotly %>%
config(displayModeBar = F)
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Ignoring unknown parameters: mult
vd4pc17tbdf
day1234pc17 <- do.call("rbind", list(d1pc17tbdf, d2pc17tbdf, d3pc17tbdf, d4pc17tbdf))
head(day1234pc17)
## severity hour Freq
## 1 0 0 952
## 2 1 0 2749
## 3 2 0 1837
## 4 3 0 292
## 5 4 0 500
## 6 0 1 676
day1234pc17gg <- ggplot(day1234pc17,
aes(x = hour, y = Freq, group = severity,
shape = severity, color=severity)
) +
geom_line() +
geom_point(aes(text =
paste(paste("Hour: ", hour, "<br>"),
paste("Frequency: ", Freq, "<br>"),
paste("Severity: ", severity))),
size = 2,
data = day1234pc17
) +
ggtitle("2017 Hourly Frequency of Severity") +
theme(axis.text.x = element_text(size = 5)) +
scale_x_discrete("Hour", breaks = labels, labels = labels) +
scale_y_continuous("Frequency") +
facet_grid(.~severity)
## Warning: Ignoring unknown aesthetics: text
day1234pc17gg
day1234pc17gp <- ggplotly(day1234pc17gg, tooltip = "text") %>%
config(displayModeBar = F)
day1234pc17gp
day1234pc17gg2 <- ggplot(day1234pc17,
aes(x=hour, y=Freq, group=severity, shape=severity,
color=severity)
) +
geom_line() +
geom_point(aes(text =
paste(paste("Hour: ", hour, "<br>"),
paste("Frequency: ", Freq, "<br>"),
paste("Severity: ", severity))),
size = 2.5,
data = day1234pc17
) +
ggtitle("2017 Hourly Frequency of Severity") +
theme(axis.text.x = element_text(size = 10)) +
scale_x_discrete("Hour", breaks = labels, labels = labels) +
scale_y_continuous("Frequency")
## Warning: Ignoring unknown aesthetics: text
day1234pc17gg2
day1234pc17gp2 <- ggplotly(day1234pc17gg2, tooltip = "text") %>%
config(displayModeBar = F)
day1234pc17gp2
#City 2017
data2pc17tb <- table(data2pc17$city, data2pc17$Priority)
names(dimnames(data2pc17tb)) <- c("city", "Priority")
head(data2pc17tb)
## Priority
## city 0 1 2 3 4
## ASHTON-SANDY SPRING 26 121 69 28 51
## BARNESVILLE 2 12 6 3 5
## BEALLSVILLE 1 13 10 4 11
## BETHESDA 945 5727 3422 640 2248
## BOYDS 144 343 213 65 139
## BRINKLOW 8 19 17 7 10
#merge with Population
data2317tb <- merge(data2pc17tb, data3popc, all = TRUE)
data2317tb$rate <- round((data2317tb$Freq/data2317tb$Census.2010)*100, digits = 1)
head(data2317tb)
## city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING 0 26 5628 0.5
## 2 ASHTON-SANDY SPRING 3 28 5628 0.5
## 3 ASHTON-SANDY SPRING 2 69 5628 1.2
## 4 ASHTON-SANDY SPRING 4 51 5628 0.9
## 5 ASHTON-SANDY SPRING 1 121 5628 2.1
## 6 BARNESVILLE 0 2 172 1.2
data2pc17sum <- rowSums(data2pc17tb)
head(data2pc17sum)
## ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA
## 295 28 39 12982
## BOYDS BRINKLOW
## 904 61
summary(data2pc17sum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 28 121 1531 6782 5752 56826
data2pc17sumdf <- as.data.frame(data2pc17sum)
names(dimnames(data2pc17sumdf)) <- c("city", "Total")
data2pc17sumdf$Total <- data2pc17sumdf$data2pc17sum
data2pc17sumdf$city <- row.names(data2pc17sumdf)
data2pc17sumdf <- data2pc17sumdf[-c(1)]
head(data2pc17sumdf)
## Total city
## ASHTON-SANDY SPRING 295 ASHTON-SANDY SPRING
## BARNESVILLE 28 BARNESVILLE
## BEALLSVILLE 39 BEALLSVILLE
## BETHESDA 12982 BETHESDA
## BOYDS 904 BOYDS
## BRINKLOW 61 BRINKLOW
#Priority 2017
data2pc17tb2 <- table(data2pc17$Priority, data2pc17$city)
names(dimnames(data2pc17tb2)) <- c("Priority", "city")
head(data2pc17tb2)
## city
## Priority ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA BOYDS BRINKLOW
## 0 26 2 1 945 144 8
## 1 121 12 13 5727 343 19
## 2 69 6 10 3422 213 17
## 3 28 3 4 640 65 7
## 4 51 5 11 2248 139 10
## city
## Priority CABIN JOHN CHEVY CHASE CLARKSBURG DAMASCUS DERWOOD DICKERSON
## 0 20 311 224 205 319 29
## 1 79 1923 1206 631 1060 104
## 2 29 1060 480 329 769 97
## 3 8 176 172 171 367 30
## 4 33 696 475 195 463 33
## city
## Priority GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO KENSINGTON
## 0 2682 5 2060 9 249
## 1 10046 46 6660 16 1395
## 2 6265 21 4442 9 644
## 3 1551 4 1056 0 200
## 4 3978 15 2421 10 540
## city
## Priority MONTGOMERY VILLAGE OLNEY POOLESVILLE POTOMAC ROCKVILLE SILVER SPRING
## 0 755 267 66 465 2451 6365
## 1 2511 1361 315 2973 10811 21718
## 2 1246 686 130 1120 6545 16481
## 3 394 246 68 363 1687 3183
## 4 1053 537 81 831 5263 9079
## city
## Priority SPENCERVILLE WASHINGTON GROVE
## 0 8 4
## 1 48 20
## 2 32 10
## 3 14 6
## 4 19 10
data2pc17sum2 <- rowSums(data2pc17tb2)
head(data2pc17sum2)
## 0 1 2 3 4
## 17620 69158 44132 10443 28196
summary(data2pc17sum2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10443 17620 28196 33910 44132 69158
data2pc17sumdf2 <- as.data.frame(data2pc17sum2)
names(dimnames(data2pc17sumdf2)) <- c("Severity", "Total")
data2pc17sumdf2$Total <- data2pc17sumdf2$data2pc17sum
data2pc17sumdf2$Severity <- row.names(data2pc17sumdf2)
data2pc17sumdf2 <- data2pc17sumdf2[-c(1)]
head(data2pc17sumdf2)
## Total Severity
## 0 17620 0
## 1 69158 1
## 2 44132 2
## 3 10443 3
## 4 28196 4
#conver the data.table to a dataframe
data2pc17df <- as.data.frame(data2pc17tb)
data2pc17df <- data2pc17df[-c(1)]
head(data2pc17df)
## Priority Freq
## 1 0 26
## 2 0 2
## 3 0 1
## 4 0 945
## 5 0 144
## 6 0 8
#show 5 numbers and outliers
pcbox17 <- ggplot(data2pc17df,
aes(x = Priority, y = Freq, fill = Priority)
) +
geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
ggtitle("2017 Crime Count of Severity") +
xlab("Severity") +
ylab("Count") +
theme(legend.position = "right") +
labs(fill = "Severity")
pcbox17
pcbox17gg <- ggplotly(pcbox17) %>% config(displayModeBar = F)
pcbox17gg
#show 5 numbers and outliers w/ rate
head(data2317tb, 10)
## city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING 0 26 5628 0.5
## 2 ASHTON-SANDY SPRING 3 28 5628 0.5
## 3 ASHTON-SANDY SPRING 2 69 5628 1.2
## 4 ASHTON-SANDY SPRING 4 51 5628 0.9
## 5 ASHTON-SANDY SPRING 1 121 5628 2.1
## 6 BARNESVILLE 0 2 172 1.2
## 7 BARNESVILLE 2 6 172 3.5
## 8 BARNESVILLE 4 5 172 2.9
## 9 BARNESVILLE 1 12 172 7.0
## 10 BARNESVILLE 3 3 172 1.7
data23box17 <- ggplot(data2317tb,
aes(x = Priority, y = rate, fill = Priority)
) +
geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
ggtitle("2017 Crime Rate by Severity") +
xlab("Severity") +
ylab("Rate") +
theme(legend.position = "right") +
labs(fill = "Severity")
data23box17
data23box17gg <- ggplotly(data23box17) %>% config(displayModeBar = F)
data23box17gg
#2018 whole year
data2pc18 <- filter(data2pc, year(datetime) == 2018)
head(data2pc18)
## End.Time Priority
## 1 4/27/2018 18:23 1
## 2 5/2/2018 8:51 4
## 3 10/19/2018 19:48 0
## 4 8/9/2018 13:21 4
## 5 12/6/2018 13:46 1
## 6 5/24/2018 9:11 1
## Close.Type city
## 1 WORKING CODE SILVER SPRING
## 2 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT BETHESDA
## 3 TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 4 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT BETHESDA
## 5 SUSPICIOUS CIRC, PERSONS, VEHICLE GERMANTOWN
## 6 CHECK WELFARE SILVER SPRING
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 4D 98 209 503
## 2 2D 64 1564 NA
## 3 3D 103 153 335
## 4 2D 67 796 NA
## 5 1D 138 361 1192
## 6 3D 204 314 1082
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 13964 111 293 13461
## 2 4300 1500 NA NA
## 3 3351 50 181 3016
## 4 2063 729 NA NA
## 5 6618 223 830 5426
## 6 28297 110 767 27215
## datetime time date
## 1 2018-04-27 18:23:00 18:23:00 04/27/2018
## 2 2018-05-02 08:51:00 08:51:00 05/02/2018
## 3 2018-10-19 19:48:00 19:48:00 10/19/2018
## 4 2018-08-09 13:21:00 13:21:00 08/09/2018
## 5 2018-12-06 13:46:00 13:46:00 12/06/2018
## 6 2018-05-24 09:11:00 09:11:00 05/24/2018
hmdfpc18 <- data2pc18[-c(1, 3:12, 14:15)]
hmdfpc18$hour <- hour(hmdfpc18$datetime)
hmdfpc18 <- hmdfpc18[-c(2)]
hmdfpc18$Priority <- as.numeric(hmdfpc18$Priority)
head(hmdfpc18)
## Priority hour
## 1 1 18
## 2 4 8
## 3 0 19
## 4 4 13
## 5 1 13
## 6 1 9
tbpc18 <- table(hmdfpc18$Priority, hmdfpc18$hour)
head(tbpc18)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 0 979 782 646 508 405 337 314 398 579 680 698 802 809 857 879
## 1 3708 2835 2234 1846 1666 1497 1438 2285 3124 3747 4265 4334 4674 4637 4524
## 2 1484 1012 828 641 576 515 620 1200 1780 2118 2132 2309 2584 2795 2727
## 3 877 570 378 245 175 113 90 246 372 503 642 684 673 775 792
## 4 609 371 244 174 179 182 253 774 1430 2060 2350 2390 2439 2565 2325
##
## 15 16 17 18 19 20 21 22 23
## 0 1186 1172 1141 1281 1202 1131 1164 1192 1109
## 1 4522 5605 5235 5213 5296 5057 5127 4992 4379
## 2 2864 3852 3539 3593 3087 2501 2386 2235 1823
## 3 744 884 929 922 880 881 1061 1246 1091
## 4 1932 2814 2445 2193 2145 1820 1529 1344 947
hourtable <- table(hmdfpc18$hour)
density <- density(hmdfpc18$hour)
barplot(tbpc18)
tbpc18df <- as.data.frame(tbpc18)
names(tbpc18df)[names(tbpc18df) == "Var1"] <- "severity"
names(tbpc18df)[names(tbpc18df) == "Var2"] <- "hour"
head(tbpc18df)
## severity hour Freq
## 1 0 0 979
## 2 1 0 3708
## 3 2 0 1484
## 4 3 0 877
## 5 4 0 609
## 6 0 1 782
htpc18 <- ggplot(tbpc18df,
aes(hour, severity, fill = Freq)
) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
geom_text(aes(label = Freq), size = 3) +
ggtitle("2018 Hourly Crime per Severity (0 = most dangerous)")
htpc18
#split the time quarterly in a day
#2018 Day1: 00:00 to 05:59
data2pc18td1 <- filter(data2pc18, hour(datetime) >= 0 & hour(datetime) < 6)
head(data2pc18td1)
## End.Time Priority Close.Type
## 1 8/21/2018 0:37 1 HARASSMENT, STALKING, THREATS
## 2 4/23/2018 3:01 1 SUSPICIOUS CIRC, PERSONS, VEHICLE
## 3 1/15/2018 5:43 2 THEFT/LARCENY
## 4 2/27/2018 1:54 3 SEXUAL ASSAULT - OCCURRED EARLIER
## 5 4/19/2018 1:56 1 WORKING CODE
## 6 5/16/2018 0:04 3 ABUSE, ABANDONMENT, NEGLECT - OCCURRED EARLIER
## city Police.District.Number CallTime.CallRoute
## 1 ROCKVILLE 1D 287
## 2 MONTGOMERY VILLAGE 6D 78
## 3 SILVER SPRING 3D 45
## 4 SILVER SPRING 3D 299
## 5 ROCKVILLE 4D 142
## 6 ROCKVILLE 6D 120
## Calltime.Dispatch Calltime.Arrive Calltime.Cleared CallRoute.Dispatch
## 1 417 461 3782 130
## 2 115 381 7722 36
## 3 137 543 1722 91
## 4 619 2261 6982 319
## 5 203 469 17545 60
## 6 556 1135 7742 436
## Dispatch.Arrive Arrive.Cleared datetime time date
## 1 43 3321 2018-08-21 00:37:00 00:37:00 08/21/2018
## 2 266 7340 2018-04-23 03:01:00 03:01:00 04/23/2018
## 3 406 1179 2018-01-15 05:43:00 05:43:00 01/15/2018
## 4 1642 4721 2018-02-27 01:54:00 01:54:00 02/27/2018
## 5 265 17076 2018-04-19 01:56:00 01:56:00 04/19/2018
## 6 579 6606 2018-05-16 00:04:00 00:04:00 05/16/2018
day1pc18 <- data2pc18td1[-c(1, 3:12, 14:15)]
day1pc18$hour <- hour(day1pc18$datetime)
day1pc18 <- day1pc18[-c(2)]
day1pc18$Priority <- as.numeric(day1pc18$Priority)
head(day1pc18)
## Priority hour
## 1 1 0
## 2 1 3
## 3 2 5
## 4 3 1
## 5 1 1
## 6 3 0
d1pc18table <- table(day1pc18$Priority, day1pc18$hour)
head(d1pc18table)
##
## 0 1 2 3 4 5
## 0 979 782 646 508 405 337
## 1 3708 2835 2234 1846 1666 1497
## 2 1484 1012 828 641 576 515
## 3 877 570 378 245 175 113
## 4 609 371 244 174 179 182
d1pc18hrtable <- table(day1pc18$hour)
d1pc18density <- density(day1pc18$hour)
barplot(d1pc18table)
d1pc18tbdf <- as.data.frame(d1pc18table)
names(d1pc18tbdf)[names(d1pc18tbdf) == "Var1"] <- "severity"
names(d1pc18tbdf)[names(d1pc18tbdf) == "Var2"] <- "hour"
head(d1pc18tbdf)
## severity hour Freq
## 1 0 0 979
## 2 1 0 3708
## 3 2 0 1484
## 4 3 0 877
## 5 4 0 609
## 6 0 1 782
#2018 Day2: 06:00 to 11:59
data2pc18td2 <- filter(data2pc18, hour(datetime) >= 6 & hour(datetime) < 12)
head(data2pc18td2)
## End.Time Priority
## 1 5/2/2018 8:51 4
## 2 5/24/2018 9:11 1
## 3 3/16/2018 6:04 0
## 4 2/2/2018 10:25 1
## 5 8/25/2018 11:28 3
## 6 9/14/2018 8:31 2
## Close.Type city
## 1 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT BETHESDA
## 2 CHECK WELFARE SILVER SPRING
## 3 SEXUAL ASSAULT GAITHERSBURG
## 4 CDS GAITHERSBURG
## 5 SEXUAL ASSAULT - OCCURRED EARLIER CHEVY CHASE
## 6 STABO - STABBING - OCCURRED EARLIER SILVER SPRING
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 2D 64 1564 NA
## 2 3D 204 314 1082
## 3 6D 125 158 237
## 4 1D 52 NA NA
## 5 2D 78 1808 2543
## 6 3D 171 242 2312
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 4300 1500 NA NA
## 2 28297 110 767 27215
## 3 18239 33 79 18001
## 4 52 NA NA NA
## 5 5882 1729 735 3338
## 6 17428 71 2070 15115
## datetime time date
## 1 2018-05-02 08:51:00 08:51:00 05/02/2018
## 2 2018-05-24 09:11:00 09:11:00 05/24/2018
## 3 2018-03-16 06:04:00 06:04:00 03/16/2018
## 4 2018-02-02 10:25:00 10:25:00 02/02/2018
## 5 2018-08-25 11:28:00 11:28:00 08/25/2018
## 6 2018-09-14 08:31:00 08:31:00 09/14/2018
day2pc18 <- data2pc18td2[-c(1, 3:12, 14:15)]
day2pc18$hour <- hour(day2pc18$datetime)
day2pc18 <- day2pc18[-c(2)]
day2pc18$Priority <- as.numeric(day2pc18$Priority)
head(day2pc18)
## Priority hour
## 1 4 8
## 2 1 9
## 3 0 6
## 4 1 10
## 5 3 11
## 6 2 8
d2pc18table <- table(day2pc18$Priority, day2pc18$hour)
head(d2pc18table)
##
## 6 7 8 9 10 11
## 0 314 398 579 680 698 802
## 1 1438 2285 3124 3747 4265 4334
## 2 620 1200 1780 2118 2132 2309
## 3 90 246 372 503 642 684
## 4 253 774 1430 2060 2350 2390
d2pc18hrtable <- table(day2pc18$hour)
d2pc18density <- density(day2pc18$hour)
barplot(d2pc18table)
d2pc18tbdf <- as.data.frame(d2pc18table)
names(d2pc18tbdf)[names(d2pc18tbdf) == "Var1"] <- "severity"
names(d2pc18tbdf)[names(d2pc18tbdf) == "Var2"] <- "hour"
head(d2pc18tbdf)
## severity hour Freq
## 1 0 6 314
## 2 1 6 1438
## 3 2 6 620
## 4 3 6 90
## 5 4 6 253
## 6 0 7 398
#2018 Day3: 12:00 to 17:59
data2pc18tn1 <- filter(data2pc18, hour(datetime) >= 12 & hour(datetime) < 18)
head(data2pc18tn1)
## End.Time Priority
## 1 8/9/2018 13:21 4
## 2 12/6/2018 13:46 1
## 3 12/30/2018 15:06 4
## 4 10/10/2018 14:45 4
## 5 10/8/2018 13:44 4
## 6 10/28/2018 15:30 3
## Close.Type city
## 1 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT BETHESDA
## 2 SUSPICIOUS CIRC, PERSONS, VEHICLE GERMANTOWN
## 3 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT GAITHERSBURG
## 4 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT SILVER SPRING
## 5 FRAUD/DECEPTION - OCCURRED EARLIER ROCKVILLE
## 6 DOMESTIC DISTURBANCE/VIOLENCE - OCCURRED EARLIER ROCKVILLE
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 2D 67 796 NA
## 2 1D 138 361 1192
## 3 6D 301 2695 NA
## 4 4D 136 154 NA
## 5 1D 334 382 1766
## 6 1D 26 1130 2527
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 2063 729 NA NA
## 2 6618 223 830 5426
## 3 3829 2393 NA NA
## 4 1982 18 NA NA
## 5 5235 47 1384 3468
## 6 5211 1103 1397 2683
## datetime time date
## 1 2018-08-09 13:21:00 13:21:00 08/09/2018
## 2 2018-12-06 13:46:00 13:46:00 12/06/2018
## 3 2018-12-30 15:06:00 15:06:00 12/30/2018
## 4 2018-10-10 14:45:00 14:45:00 10/10/2018
## 5 2018-10-08 13:44:00 13:44:00 10/08/2018
## 6 2018-10-28 15:30:00 15:30:00 10/28/2018
day3pc18 <- data2pc18tn1[-c(1, 3:12, 14:15)]
day3pc18$hour <- hour(day3pc18$datetime)
day3pc18 <- day3pc18[-c(2)]
day3pc18$Priority <- as.numeric(day3pc18$Priority)
head(day3pc18)
## Priority hour
## 1 4 13
## 2 1 13
## 3 4 15
## 4 4 14
## 5 4 13
## 6 3 15
d3pc18table <- table(day3pc18$Priority, day3pc18$hour)
head(d3pc18table)
##
## 12 13 14 15 16 17
## 0 809 857 879 1186 1172 1141
## 1 4674 4637 4524 4522 5605 5235
## 2 2584 2795 2727 2864 3852 3539
## 3 673 775 792 744 884 929
## 4 2439 2565 2325 1932 2814 2445
d3pc18hrtable <- table(day3pc18$hour)
d3pc18density <- density(day3pc18$hour)
barplot(d3pc18table)
d3pc18tbdf <- as.data.frame(d3pc18table)
names(d3pc18tbdf)[names(d3pc18tbdf) == "Var1"] <- "severity"
names(d3pc18tbdf)[names(d3pc18tbdf) == "Var2"] <- "hour"
head(d3pc18tbdf)
## severity hour Freq
## 1 0 12 809
## 2 1 12 4674
## 3 2 12 2584
## 4 3 12 673
## 5 4 12 2439
## 6 0 13 857
#2018 Day4: 18:00 to 23:59
data2pc18tn2 <- filter(data2pc18, hour(datetime) >= 18 & hour(datetime) < 24)
head(data2pc18tn2)
## End.Time Priority Close.Type
## 1 4/27/2018 18:23 1 WORKING CODE
## 2 10/19/2018 19:48 0 TRAFFIC/TRANSPORTATION INCIDENT
## 3 10/12/2018 21:04 4 BURGLARY - OCCURRED EARLIER
## 4 5/23/2018 18:53 0 DOMESTIC DISTURBANCE/VIOLENCE - OCCURRED EARLIER
## 5 6/6/2018 22:34 0 BURGLARY JUST OCCURRED
## 6 5/3/2018 18:08 2 STATION RESPONSE
## city Police.District.Number CallTime.CallRoute Calltime.Dispatch
## 1 SILVER SPRING 4D 98 209
## 2 SILVER SPRING 3D 103 153
## 3 POTOMAC 1D 135 758
## 4 ROCKVILLE 1D 143 195
## 5 ROCKVILLE 4D 64 174
## 6 BETHESDA 2D 133 534
## Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1 503 13964 111 293
## 2 335 3351 50 181
## 3 769 17419 623 10
## 4 214 10384 52 18
## 5 919 7074 110 745
## 6 674 5400 401 139
## Arrive.Cleared datetime time date
## 1 13461 2018-04-27 18:23:00 18:23:00 04/27/2018
## 2 3016 2018-10-19 19:48:00 19:48:00 10/19/2018
## 3 16650 2018-10-12 21:04:00 21:04:00 10/12/2018
## 4 10169 2018-05-23 18:53:00 18:53:00 05/23/2018
## 5 6154 2018-06-06 22:34:00 22:34:00 06/06/2018
## 6 4725 2018-05-03 18:08:00 18:08:00 05/03/2018
day4pc18 <- data2pc18tn2[-c(1, 3:12, 14:15)]
day4pc18$hour <- hour(day4pc18$datetime)
day4pc18 <- day4pc18[-c(2)]
day4pc18$Priority <- as.numeric(day4pc18$Priority)
head(day4pc18)
## Priority hour
## 1 1 18
## 2 0 19
## 3 4 21
## 4 0 18
## 5 0 22
## 6 2 18
d4pc18table <- table(day4pc18$Priority, day4pc18$hour)
head(d4pc18table)
##
## 18 19 20 21 22 23
## 0 1281 1202 1131 1164 1192 1109
## 1 5213 5296 5057 5127 4992 4379
## 2 3593 3087 2501 2386 2235 1823
## 3 922 880 881 1061 1246 1091
## 4 2193 2145 1820 1529 1344 947
d4pc18hrtable <- table(day4pc18$hour)
d4pc18density <- density(day4pc18$hour)
barplot(d4pc18table)
d4pc18tbdf <- as.data.frame(d4pc18table)
names(d4pc18tbdf)[names(d4pc18tbdf) == "Var1"] <- "severity"
names(d4pc18tbdf)[names(d4pc18tbdf) == "Var2"] <- "hour"
head(d4pc18tbdf)
## severity hour Freq
## 1 0 18 1281
## 2 1 18 5213
## 3 2 18 3593
## 4 3 18 922
## 5 4 18 2193
## 6 0 19 1202
day1234pc18 <- do.call("rbind", list(d1pc18tbdf, d2pc18tbdf, d3pc18tbdf, d4pc18tbdf))
head(day1234pc18)
## severity hour Freq
## 1 0 0 979
## 2 1 0 3708
## 3 2 0 1484
## 4 3 0 877
## 5 4 0 609
## 6 0 1 782
day1234pc18gg <- ggplot(day1234pc18,
aes(x = hour, y = Freq, group = severity, shape = severity,
color = severity)
) +
geom_line() +
geom_point(aes(text =
paste(paste("Hour: ", hour, "<br>"),
paste("Frequency: ", Freq, "<br>"),
paste("Severity: ", severity))),
size = 2,
data = day1234pc18
) +
ggtitle("2018 Hourly Frequency of Severity") +
theme(axis.text.x = element_text(size = 5)) +
scale_x_discrete("Hour", breaks = labels, labels = labels) +
scale_y_continuous("Frequency") +
facet_grid(.~severity )
## Warning: Ignoring unknown aesthetics: text
day1234pc18gg
day1234pc18gp <- ggplotly(day1234pc18gg, tooltip = "text") %>%
config(displayModeBar = F)
day1234pc18gp
day1234pc18gg2 <- ggplot(day1234pc18,
aes(x = hour, y = Freq, group = severity, shape = severity,
color = severity)
) +
geom_line() +
geom_point(aes(text =
paste(paste("Hour: ", hour, "<br>"),
paste("Frequency: ", Freq, "<br>"),
paste("Severity: ", severity))),
size = 2.5,
data = day1234pc18
) +
ggtitle("2018 Hourly Frequency of Severity") +
theme(axis.text.x = element_text(size = 10)) +
scale_x_discrete("Hour", breaks = labels, labels = labels) +
scale_y_continuous("Frequency")
## Warning: Ignoring unknown aesthetics: text
day1234pc18gg2
day1234pc18gp2 <- ggplotly(day1234pc18gg2, tooltip = "text") %>%
config(displayModeBar = F)
day1234pc18gp2
#City 2018
data2pc18tb <- table(data2pc18$city, data2pc18$Priority)
names(dimnames(data2pc18tb)) <- c("city", "Priority")
head(data2pc18tb)
## Priority
## city 0 1 2 3 4
## ASHTON-SANDY SPRING 40 162 90 20 42
## BARNESVILLE 2 14 14 4 3
## BEALLSVILLE 7 15 17 2 3
## BETHESDA 1251 7485 3958 940 2920
## BOYDS 185 516 241 79 190
## BRINKLOW 13 46 21 6 9
#merge with Population
data2318tb <- merge(data2pc18tb, data3popc, all = TRUE)
data2318tb$rate <- round((data2318tb$Freq/data2318tb$Census.2010)*100, digits = 1)
head(data2318tb)
## city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING 0 40 5628 0.7
## 2 ASHTON-SANDY SPRING 3 20 5628 0.4
## 3 ASHTON-SANDY SPRING 2 90 5628 1.6
## 4 ASHTON-SANDY SPRING 4 42 5628 0.7
## 5 ASHTON-SANDY SPRING 1 162 5628 2.9
## 6 BARNESVILLE 0 2 172 1.2
data2pc18sum <- rowSums(data2pc18tb)
head(data2pc18sum)
## ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA
## 354 37 44 16554
## BOYDS BRINKLOW
## 1211 95
summary(data2pc18sum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 37 122 1970 8519 7024 71850
data2pc18sumdf <- as.data.frame(data2pc18sum)
names(dimnames(data2pc18sumdf)) <- c("city", "Total")
data2pc18sumdf$Total <- data2pc18sumdf$data2pc18sum
data2pc18sumdf$city <- row.names(data2pc18sumdf)
data2pc18sumdf <- data2pc18sumdf[-c(1)]
head(data2pc18sumdf)
## Total city
## ASHTON-SANDY SPRING 354 ASHTON-SANDY SPRING
## BARNESVILLE 37 BARNESVILLE
## BEALLSVILLE 44 BEALLSVILLE
## BETHESDA 16554 BETHESDA
## BOYDS 1211 BOYDS
## BRINKLOW 95 BRINKLOW
#Priority 2018
data2pc18tb2 <- table(data2pc18$Priority, data2pc18$city)
names(dimnames(data2pc18tb2)) <- c("Priority", "city")
head(data2pc18tb2)
## city
## Priority ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA BOYDS BRINKLOW
## 0 40 2 7 1251 185 13
## 1 162 14 15 7485 516 46
## 2 90 14 17 3958 241 21
## 3 20 4 2 940 79 6
## 4 42 3 3 2920 190 9
## city
## Priority CABIN JOHN CHEVY CHASE CLARKSBURG DAMASCUS DERWOOD DICKERSON
## 0 19 349 265 216 435 29
## 1 103 2429 1556 898 1616 164
## 2 40 1214 592 387 1090 120
## 3 34 248 241 176 339 51
## 4 56 1056 634 293 513 47
## city
## Priority GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO KENSINGTON
## 0 2944 5 2238 6 339
## 1 12965 48 8803 30 1671
## 2 6977 19 4465 17 713
## 3 2360 6 1624 4 279
## 4 4975 26 3072 8 623
## city
## Priority MONTGOMERY VILLAGE OLNEY POOLESVILLE POTOMAC ROCKVILLE SILVER SPRING
## 0 837 327 73 453 2774 7422
## 1 3218 1770 361 3888 15190 29210
## 2 1126 691 147 1402 7300 18529
## 3 681 342 71 441 2699 5116
## 4 1162 620 123 1098 6442 11573
## city
## Priority SPENCERVILLE WASHINGTON GROVE
## 0 19 3
## 1 58 24
## 2 25 6
## 3 6 4
## 4 14 12
data2pc18sum2 <- rowSums(data2pc18tb2)
head(data2pc18sum2)
## 0 1 2 3 4
## 20251 92240 49201 15773 35514
summary(data2pc18sum2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15773 20251 35514 42596 49201 92240
data2pc18sumdf2 <- as.data.frame(data2pc18sum2)
names(dimnames(data2pc18sumdf2)) <- c("Severity", "Total")
data2pc18sumdf2$Total <- data2pc18sumdf2$data2pc18sum
data2pc18sumdf2$Severity <- row.names(data2pc18sumdf2)
data2pc18sumdf2 <- data2pc18sumdf2[-c(1)]
head(data2pc18sumdf2)
## Total Severity
## 0 20251 0
## 1 92240 1
## 2 49201 2
## 3 15773 3
## 4 35514 4
#conver the data.table to a dataframe
data2pc18df <- as.data.frame(data2pc18tb)
data2pc18df <- data2pc18df[-c(1)]
head(data2pc18df)
## Priority Freq
## 1 0 40
## 2 0 2
## 3 0 7
## 4 0 1251
## 5 0 185
## 6 0 13
#show 5 numbers and outliers
pcbox18 <- ggplot(data2pc18df,
aes(x = Priority, y = Freq, fill = Priority)
) +
geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
ggtitle("2018 Crime Count of Severity") +
xlab("Severity") +
ylab("Count") +
theme(legend.position = "right") +
labs(fill = "Severity")
pcbox18
pcbox18gg <- ggplotly(pcbox18) %>% config(displayModeBar = F)
pcbox18gg
#show 5 numbers and outliers w/ rate
head(data2318tb, 10)
## city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING 0 40 5628 0.7
## 2 ASHTON-SANDY SPRING 3 20 5628 0.4
## 3 ASHTON-SANDY SPRING 2 90 5628 1.6
## 4 ASHTON-SANDY SPRING 4 42 5628 0.7
## 5 ASHTON-SANDY SPRING 1 162 5628 2.9
## 6 BARNESVILLE 0 2 172 1.2
## 7 BARNESVILLE 2 14 172 8.1
## 8 BARNESVILLE 4 3 172 1.7
## 9 BARNESVILLE 1 14 172 8.1
## 10 BARNESVILLE 3 4 172 2.3
data23box18 <- ggplot(data2318tb,
aes(x = Priority, y = rate, fill = Priority)
) +
geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
ggtitle("2018 Crime Rate by Severity") +
xlab("Severity") +
ylab("Rate") +
theme(legend.position = "right") +
labs(fill = "Severity")
data23box18
data23box18gg <- ggplotly(data23box18) %>% config(displayModeBar = F)
data23box18gg
#2019 Whole year
data2pc19 <- filter(data2pc, year(datetime) == 2019)
head(data2pc19)
## End.Time Priority Close.Type city
## 1 9/14/2019 14:29 0 TRAFFIC/TRANSPORTATION INCIDENT GERMANTOWN
## 2 10/22/2019 8:25 2 TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 3 9/8/2019 3:47 2 ASSIST OTHER AGENCY DAMASCUS
## 4 3/21/2019 5:59 0 URGENT ASSIST ROCKVILLE
## 5 8/14/2019 19:59 2 TRAFFIC VIOLATION ROCKVILLE
## 6 2/24/2019 0:56 2 TRAFFIC/TRANSPORTATION INCIDENT BETHESDA
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 5D 99 118 347
## 2 3D 92 282 627
## 3 5D 0 77 604
## 4 4D 0 44 461
## 5 1D 120 575 788
## 6 2D 402 1354 1513
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 4296 18 228 3949
## 2 1801 189 345 1174
## 3 628 77 527 24
## 4 35782 44 416 35321
## 5 1227 455 212 439
## 6 4237 952 159 2723
## datetime time date
## 1 2019-09-14 14:29:00 14:29:00 09/14/2019
## 2 2019-10-22 08:25:00 08:25:00 10/22/2019
## 3 2019-09-08 03:47:00 03:47:00 09/08/2019
## 4 2019-03-21 05:59:00 05:59:00 03/21/2019
## 5 2019-08-14 19:59:00 19:59:00 08/14/2019
## 6 2019-02-24 00:56:00 00:56:00 02/24/2019
hmdfpc19 <- data2pc19[-c(1, 3:12, 14:15)]
hmdfpc19$hour <- hour(hmdfpc19$datetime)
hmdfpc19 <- hmdfpc19[-c(2)]
hmdfpc19$Priority <- as.numeric(hmdfpc19$Priority)
head(hmdfpc19)
## Priority hour
## 1 0 14
## 2 2 8
## 3 2 3
## 4 0 5
## 5 2 19
## 6 2 0
tbpc19 <- table(hmdfpc19$Priority, hmdfpc19$hour)
head(tbpc19)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 0 984 755 598 555 413 331 313 352 465 607 676 695 796 849 809
## 1 3500 2745 2212 1816 1573 1367 1280 2202 2908 3476 3796 4029 4251 4290 4152
## 2 1279 903 712 565 522 481 567 1102 1707 1982 2024 2158 2451 2805 2690
## 3 1093 765 505 347 252 152 104 241 394 410 537 480 504 647 588
## 4 583 362 248 169 193 181 228 878 1488 2066 2533 2560 2499 2587 2444
##
## 15 16 17 18 19 20 21 22 23
## 0 1124 1074 1141 1189 1241 1077 1166 1167 1043
## 1 4176 5319 5000 4972 4876 4902 4978 5092 4071
## 2 2776 3842 3469 3332 3004 2455 2213 2094 1617
## 3 577 725 758 737 751 763 1132 1664 1446
## 4 2167 2998 2719 2190 2031 1795 1591 1443 825
hourtable <- table(hmdfpc19$hour)
density <- density(hmdfpc19$hour)
barplot(tbpc19)
tbpc19df <- as.data.frame(tbpc19)
names(tbpc19df)[names(tbpc19df) == "Var1"] <- "severity"
names(tbpc19df)[names(tbpc19df) == "Var2"] <- "hour"
head(tbpc19df)
## severity hour Freq
## 1 0 0 984
## 2 1 0 3500
## 3 2 0 1279
## 4 3 0 1093
## 5 4 0 583
## 6 0 1 755
htpc19 <- ggplot(tbpc19df,
aes(hour, severity, fill = Freq)
) +
geom_tile() + scale_fill_gradient(low = "white", high = "red") +
geom_text(aes(label = Freq), size = 3) +
ggtitle("2019 Hourly Crime per Severity (0 = most dangerous)")
htpc19
#split the time quarterly in a day
#2019 Day1: 00:00 to 05:59
data2pc19td1 <- filter(data2pc19, hour(datetime) >= 0 & hour(datetime) < 6)
head(data2pc19td1)
## End.Time Priority Close.Type city
## 1 9/8/2019 3:47 2 ASSIST OTHER AGENCY DAMASCUS
## 2 3/21/2019 5:59 0 URGENT ASSIST ROCKVILLE
## 3 2/24/2019 0:56 2 TRAFFIC/TRANSPORTATION INCIDENT BETHESDA
## 4 10/18/2019 0:56 1 CHECK WELFARE GAITHERSBURG
## 5 11/24/2019 2:48 1 ASSAULT JUST OCCURRED - ROUTINE GAITHERSBURG
## 6 5/17/2019 1:14 0 DOMESTIC DISTURBANCE/VIOLENCE SILVER SPRING
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 5D 0 77 604
## 2 4D 0 44 461
## 3 2D 402 1354 1513
## 4 6D 646 811 1121
## 5 6D 401 537 899
## 6 3D 42 99 208
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 628 77 527 24
## 2 35782 44 416 35321
## 3 4237 952 159 2723
## 4 1496 164 310 375
## 5 2464 136 361 1565
## 6 9544 56 108 9336
## datetime time date
## 1 2019-09-08 03:47:00 03:47:00 09/08/2019
## 2 2019-03-21 05:59:00 05:59:00 03/21/2019
## 3 2019-02-24 00:56:00 00:56:00 02/24/2019
## 4 2019-10-18 00:56:00 00:56:00 10/18/2019
## 5 2019-11-24 02:48:00 02:48:00 11/24/2019
## 6 2019-05-17 01:14:00 01:14:00 05/17/2019
day1pc19 <- data2pc19td1[-c(1, 3:12, 14:15)]
day1pc19$hour <- hour(day1pc19$datetime)
day1pc19 <- day1pc19[-c(2)]
day1pc19$Priority <- as.numeric(day1pc19$Priority)
head(day1pc19)
## Priority hour
## 1 2 3
## 2 0 5
## 3 2 0
## 4 1 0
## 5 1 2
## 6 0 1
d1pc19table <- table(day1pc19$Priority, day1pc19$hour)
head(d1pc19table)
##
## 0 1 2 3 4 5
## 0 984 755 598 555 413 331
## 1 3500 2745 2212 1816 1573 1367
## 2 1279 903 712 565 522 481
## 3 1093 765 505 347 252 152
## 4 583 362 248 169 193 181
d1pc19hrtable <- table(day1pc19$hour)
d1pc19density <- density(day1pc19$hour)
barplot(d1pc19table)
d1pc19tbdf <- as.data.frame(d1pc19table)
names(d1pc19tbdf)[names(d1pc19tbdf) == "Var1"] <- "severity"
names(d1pc19tbdf)[names(d1pc19tbdf) == "Var2"] <- "hour"
head(d1pc19tbdf)
## severity hour Freq
## 1 0 0 984
## 2 1 0 3500
## 3 2 0 1279
## 4 3 0 1093
## 5 4 0 583
## 6 0 1 755
#2019 Day2: 06:00 to 11:59
data2pc19td2 <- filter(data2pc19, hour(datetime) >= 6 & hour(datetime) < 12)
head(data2pc19td2)
## End.Time Priority
## 1 10/22/2019 8:25 2
## 2 5/18/2019 8:22 1
## 3 5/30/2019 10:09 4
## 4 5/30/2019 11:19 4
## 5 5/15/2019 11:19 1
## 6 5/15/2019 10:13 4
## Close.Type city
## 1 TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 2 CHECK WELFARE SILVER SPRING
## 3 THEFT/LARCENY - OCCURRED EARLIER ROCKVILLE
## 4 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT ROCKVILLE
## 5 SUSPICIOUS CIRC, PERSONS, VEHICLE SILVER SPRING
## 6 TRAFFIC/TRANSPORTATION INCIDENT - OCCURRED EARLIER GERMANTOWN
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 3D 92 282 627
## 2 3D 416 756 1204
## 3 1D 215 365 1429
## 4 6D 139 350 NA
## 5 3D 60 326 484
## 6 5D 221 404 1276
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 1801 189 345 1174
## 2 2817 339 448 1612
## 3 4260 150 1064 2830
## 4 2182 211 NA NA
## 5 5468 265 158 4984
## 6 3241 182 871 1964
## datetime time date
## 1 2019-10-22 08:25:00 08:25:00 10/22/2019
## 2 2019-05-18 08:22:00 08:22:00 05/18/2019
## 3 2019-05-30 10:09:00 10:09:00 05/30/2019
## 4 2019-05-30 11:19:00 11:19:00 05/30/2019
## 5 2019-05-15 11:19:00 11:19:00 05/15/2019
## 6 2019-05-15 10:13:00 10:13:00 05/15/2019
day2pc19 <- data2pc19td2[-c(1, 3:12, 14:15)]
day2pc19$hour <- hour(day2pc19$datetime)
day2pc19 <- day2pc19[-c(2)]
day2pc19$Priority <- as.numeric(day2pc19$Priority)
head(day2pc19)
## Priority hour
## 1 2 8
## 2 1 8
## 3 4 10
## 4 4 11
## 5 1 11
## 6 4 10
d2pc19table <- table(day2pc19$Priority, day2pc19$hour)
head(d2pc19table)
##
## 6 7 8 9 10 11
## 0 313 352 465 607 676 695
## 1 1280 2202 2908 3476 3796 4029
## 2 567 1102 1707 1982 2024 2158
## 3 104 241 394 410 537 480
## 4 228 878 1488 2066 2533 2560
d2pc19hrtable <- table(day2pc19$hour)
d2pc19density <- density(day2pc19$hour)
barplot(d2pc19table)
d2pc19tbdf <- as.data.frame(d2pc19table)
names(d2pc19tbdf)[names(d2pc19tbdf) == "Var1"] <- "severity"
names(d2pc19tbdf)[names(d2pc19tbdf) == "Var2"] <- "hour"
head(d2pc19tbdf)
## severity hour Freq
## 1 0 6 313
## 2 1 6 1280
## 3 2 6 567
## 4 3 6 104
## 5 4 6 228
## 6 0 7 352
#2019 Day3: 12:00 to 17:59
data2pc19tn1 <- filter(data2pc19, hour(datetime) >= 12 & hour(datetime) < 18)
head(data2pc19tn1)
## End.Time Priority Close.Type
## 1 9/14/2019 14:29 0 TRAFFIC/TRANSPORTATION INCIDENT
## 2 9/29/2019 15:21 3 ROBBERY - OCCURRED EARLIER
## 3 5/8/2019 15:13 2 THEFT/LARCENY
## 4 1/14/2019 13:42 4 THEFT/LARCENY - OCCURRED EARLIER
## 5 7/31/2019 15:37 4 VANDALISM, DAMAGE, MISCHIEF - OCCURRED EARLIER
## 6 10/21/2019 15:08 4 FRAUD/DECEPTION - OCCURRED EARLIER
## city Police.District.Number CallTime.CallRoute Calltime.Dispatch
## 1 GERMANTOWN 5D 99 118
## 2 SILVER SPRING 3D 96 632
## 3 GERMANTOWN 5D 63 123
## 4 SILVER SPRING 3D 385 1582
## 5 KENSINGTON 4D 214 2728
## 6 BOYDS 5D 337 590
## Calltime.Arrive Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive
## 1 347 4296 18 228
## 2 754 11365 536 121
## 3 790 4847 60 666
## 4 2087 4768 1197 505
## 5 2944 3684 2514 215
## 6 939 2611 252 348
## Arrive.Cleared datetime time date
## 1 3949 2019-09-14 14:29:00 14:29:00 09/14/2019
## 2 10610 2019-09-29 15:21:00 15:21:00 09/29/2019
## 3 4057 2019-05-08 15:13:00 15:13:00 05/08/2019
## 4 2680 2019-01-14 13:42:00 13:42:00 01/14/2019
## 5 740 2019-07-31 15:37:00 15:37:00 07/31/2019
## 6 1672 2019-10-21 15:08:00 15:08:00 10/21/2019
day3pc19 <- data2pc19tn1[-c(1, 3:12, 14:15)]
day3pc19$hour <- hour(day3pc19$datetime)
day3pc19 <- day3pc19[-c(2)]
day3pc19$Priority <- as.numeric(day3pc19$Priority)
head(day3pc19)
## Priority hour
## 1 0 14
## 2 3 15
## 3 2 15
## 4 4 13
## 5 4 15
## 6 4 15
d3pc19table <- table(day3pc19$Priority, day3pc19$hour)
head(d3pc19table)
##
## 12 13 14 15 16 17
## 0 796 849 809 1124 1074 1141
## 1 4251 4290 4152 4176 5319 5000
## 2 2451 2805 2690 2776 3842 3469
## 3 504 647 588 577 725 758
## 4 2499 2587 2444 2167 2998 2719
d3pc19hrtable <- table(day3pc19$hour)
d3pc19density <- density(day3pc19$hour)
barplot(d3pc19table)
d3pc19tbdf <- as.data.frame(d3pc19table)
names(d3pc19tbdf)[names(d3pc19tbdf) == "Var1"] <- "severity"
names(d3pc19tbdf)[names(d3pc19tbdf) == "Var2"] <- "hour"
head(d3pc19tbdf)
## severity hour Freq
## 1 0 12 796
## 2 1 12 4251
## 3 2 12 2451
## 4 3 12 504
## 5 4 12 2499
## 6 0 13 849
#2019 Day4: 18:00 to 23:59
data2pc19tn2 <- filter(data2pc19, hour(datetime) >= 18 & hour(datetime) < 24)
head(data2pc19tn2)
## End.Time Priority
## 1 8/14/2019 19:59 2
## 2 4/9/2019 19:26 4
## 3 5/7/2019 21:11 4
## 4 5/8/2019 23:37 1
## 5 5/30/2019 18:01 4
## 6 5/15/2019 20:29 2
## Close.Type city
## 1 TRAFFIC VIOLATION ROCKVILLE
## 2 VANDALISM, DAMAGE, MISCHIEF - OCCURRED EARLIER GERMANTOWN
## 3 THEFTT - TRS THEFT/LARCENY - TELEPHONE REPORTING UNIT GAITHERSBURG
## 4 MISSING, RUNAWAY, FOUND PERSON GAITHERSBURG
## 5 THEFT/LARCENY - OCCURRED EARLIER ROCKVILLE
## 6 THEFT/LARCENY - HOLDING SUSPECT SILVER SPRING
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 1D 120 575 788
## 2 5D 182 297 1318
## 3 6D 184 612 NA
## 4 6D 245 930 1667
## 5 2D 308 714 1270
## 6 3D 65 103 5750
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 1227 455 212 439
## 2 4769 114 1020 3450
## 3 1313 427 NA NA
## 4 9964 685 737 8296
## 5 2702 406 555 1432
## 6 7455 38 5646 1705
## datetime time date
## 1 2019-08-14 19:59:00 19:59:00 08/14/2019
## 2 2019-04-09 19:26:00 19:26:00 04/09/2019
## 3 2019-05-07 21:11:00 21:11:00 05/07/2019
## 4 2019-05-08 23:37:00 23:37:00 05/08/2019
## 5 2019-05-30 18:01:00 18:01:00 05/30/2019
## 6 2019-05-15 20:29:00 20:29:00 05/15/2019
day4pc19 <- data2pc19tn2[-c(1, 3:12, 14:15)]
day4pc19$hour <- hour(day4pc19$datetime)
day4pc19 <- day4pc19[-c(2)]
day4pc19$Priority <- as.numeric(day4pc19$Priority)
head(day4pc19)
## Priority hour
## 1 2 19
## 2 4 19
## 3 4 21
## 4 1 23
## 5 4 18
## 6 2 20
d4pc19table <- table(day4pc19$Priority, day4pc19$hour)
head(d4pc19table)
##
## 18 19 20 21 22 23
## 0 1189 1241 1077 1166 1167 1043
## 1 4972 4876 4902 4978 5092 4071
## 2 3332 3004 2455 2213 2094 1617
## 3 737 751 763 1132 1664 1446
## 4 2190 2031 1795 1591 1443 825
d4pc19hrtable <- table(day4pc19$hour)
d4pc19density <- density(day4pc19$hour)
barplot(d4pc19table)
d4pc19tbdf <- as.data.frame(d4pc19table)
names(d4pc19tbdf)[names(d4pc19tbdf) == "Var1"] <- "severity"
names(d4pc19tbdf)[names(d4pc19tbdf) == "Var2"] <- "hour"
head(d4pc19tbdf)
## severity hour Freq
## 1 0 18 1189
## 2 1 18 4972
## 3 2 18 3332
## 4 3 18 737
## 5 4 18 2190
## 6 0 19 1241
day1234pc19 <- do.call("rbind", list(d1pc19tbdf, d2pc19tbdf, d3pc19tbdf, d4pc19tbdf))
head(day1234pc19)
## severity hour Freq
## 1 0 0 984
## 2 1 0 3500
## 3 2 0 1279
## 4 3 0 1093
## 5 4 0 583
## 6 0 1 755
day1234pc19gg <- ggplot(day1234pc19,
aes(x = hour, y = Freq, group = severity, shape = severity,
color = severity)
) +
geom_line() +
geom_point(aes(text =
paste(paste("Hour: ", day1234pc19$hour, "<br>"),
paste("Frequency: ", day1234pc19$Freq, "<br>"),
paste("Severity: ", day1234pc19$severity))),
size = 2,
data = day1234pc19
) +
ggtitle("2019 Hourly Frequency of Severity") +
theme(axis.text.x = element_text(size = 5)) +
scale_x_discrete("Hour", breaks = labels, labels = labels) +
scale_y_continuous("Frequency") +
facet_grid(.~severity )
## Warning: Ignoring unknown aesthetics: text
day1234pc19gg
## Warning: Use of `day1234pc19$hour` is discouraged. Use `hour` instead.
## Warning: Use of `day1234pc19$Freq` is discouraged. Use `Freq` instead.
## Warning: Use of `day1234pc19$severity` is discouraged. Use `severity` instead.
day1234pc19gp <- ggplotly(day1234pc19gg, tooltip = "text") %>%
config(displayModeBar = F)
## Warning: Use of `day1234pc19$hour` is discouraged. Use `hour` instead.
## Warning: Use of `day1234pc19$Freq` is discouraged. Use `Freq` instead.
## Warning: Use of `day1234pc19$severity` is discouraged. Use `severity` instead.
day1234pc19gp
#without animation
day1234pc19gg2 <- ggplot(day1234pc19,
aes(x = hour, y = Freq, group = severity,
shape = severity, color = severity)
) +
geom_line() +
geom_point(aes(text =
paste(paste("Hour: ", hour, "<br>"),
paste("Frequency: ", Freq, "<br>"),
paste("Severity: ", severity))),
size = 2.5,
data = day1234pc19
) +
ggtitle("2019 Hourly Frequency of Severity") +
theme(axis.text.x = element_text(size = 10)) +
scale_x_discrete("Hour", breaks = labels, labels = labels) +
scale_y_continuous("Frequency")
## Warning: Ignoring unknown aesthetics: text
day1234pc19gg2
##with animation but it won't be used
#day1234pc19gg22 <- ggplot(day1234pc19,
# aes(x = hour, y = Freq, group = severity,
# shape = severity, color = severity)) +
# geom_line() +
# geom_point(aes(text =
# paste(paste("Hour: ", hour, "<br>"),
# paste("Frequency: ", Freq, "<br>"),
# paste("Severity: ", severity))),
# size = 1,
# data = day1234pc19
# ) +
# ggtitle("2019 Hourly Frequency of Severity") +
# theme(axis.text.x = element_text(size = 5)) +
# scale_x_discrete("Hour", breaks = labels, labels = labels) +
# scale_y_continuous("Frequency") +
# transition_reveal(as.numeric(hour))
#
#day1234pc19gg22
#
#day1234pc19gp22 <- ggplotly(day1234pc19gg2, tooltip = "text") %>%
# config(displayModeBar = F)
#day1234pc19gp22
#City 2019
data2pc19tb <- table(data2pc19$city, data2pc19$Priority)
names(dimnames(data2pc19tb)) <- c("city", "Priority")
head(data2pc19tb)
## Priority
## city 0 1 2 3 4
## ASHTON-SANDY SPRING 34 180 113 16 51
## BARNESVILLE 2 19 14 4 3
## BEALLSVILLE 2 18 22 7 5
## BETHESDA 1103 6634 3891 879 2944
## BOYDS 164 477 272 82 165
## BRINKLOW 8 49 26 2 12
#merge with Population
data2319tb <- merge(data2pc19tb, data3popc, all = TRUE)
data2319tb$rate <- round((data2319tb$Freq/data2319tb$Census.2010)*100, digits = 1)
head(data2319tb)
## city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING 0 34 5628 0.6
## 2 ASHTON-SANDY SPRING 3 16 5628 0.3
## 3 ASHTON-SANDY SPRING 2 113 5628 2.0
## 4 ASHTON-SANDY SPRING 4 51 5628 0.9
## 5 ASHTON-SANDY SPRING 1 180 5628 3.2
## 6 BARNESVILLE 0 2 172 1.2
data2pc19sum <- rowSums(data2pc19tb)
data2pc19sum
## ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA
## 394 42 54 15451
## BOYDS BRINKLOW CABIN JOHN CHEVY CHASE
## 1160 97 225 5295
## CLARKSBURG DAMASCUS DERWOOD DICKERSON
## 3370 1917 3750 367
## GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO
## 29562 116 18236 62
## KENSINGTON MONTGOMERY VILLAGE OLNEY POOLESVILLE
## 3669 6955 3485 758
## POTOMAC ROCKVILLE SILVER SPRING SPENCERVILLE
## 7189 33719 69440 126
## WASHINGTON GROVE
## 64
summary(data2pc19sum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42 126 1917 8220 6955 69440
data2pc19sumdf <- as.data.frame(data2pc19sum)
names(dimnames(data2pc19sumdf)) <- c("city", "Total")
data2pc19sumdf$Total <- data2pc19sumdf$data2pc19sum
data2pc19sumdf$city <- row.names(data2pc19sumdf)
data2pc19sumdf <- data2pc19sumdf[-c(1)]
head(data2pc19sumdf)
## Total city
## ASHTON-SANDY SPRING 394 ASHTON-SANDY SPRING
## BARNESVILLE 42 BARNESVILLE
## BEALLSVILLE 54 BEALLSVILLE
## BETHESDA 15451 BETHESDA
## BOYDS 1160 BOYDS
## BRINKLOW 97 BRINKLOW
#Priority 2019
data2pc19tb2 <- table(data2pc19$Priority, data2pc19$city)
names(dimnames(data2pc19tb2)) <- c("Priority", "city")
head(data2pc19tb2)
## city
## Priority ASHTON-SANDY SPRING BARNESVILLE BEALLSVILLE BETHESDA BOYDS BRINKLOW
## 0 34 2 2 1103 164 8
## 1 180 19 18 6634 477 49
## 2 113 14 22 3891 272 26
## 3 16 4 7 879 82 2
## 4 51 3 5 2944 165 12
## city
## Priority CABIN JOHN CHEVY CHASE CLARKSBURG DAMASCUS DERWOOD DICKERSON
## 0 13 360 282 213 409 37
## 1 100 2385 1560 859 1560 129
## 2 40 1282 569 356 906 126
## 3 17 283 265 149 274 33
## 4 55 985 694 340 601 42
## city
## Priority GAITHERSBURG GARRETT PARK GERMANTOWN GLEN ECHO KENSINGTON
## 0 2980 9 2076 6 291
## 1 12604 55 7990 19 1658
## 2 6571 19 3843 16 753
## 3 2389 12 1685 0 229
## 4 5018 21 2642 21 738
## city
## Priority MONTGOMERY VILLAGE OLNEY POOLESVILLE POTOMAC ROCKVILLE SILVER SPRING
## 0 815 309 74 416 2738 7049
## 1 3129 1597 353 3909 14336 27293
## 2 1042 687 136 1329 6884 17809
## 3 731 253 62 367 2808 5008
## 4 1238 639 133 1168 6953 12281
## city
## Priority SPENCERVILLE WASHINGTON GROVE
## 0 23 7
## 1 40 30
## 2 36 8
## 3 8 9
## 4 19 10
data2pc19sum2 <- rowSums(data2pc19tb2)
data2pc19sum2
## 0 1 2 3 4
## 19420 86983 46750 15572 36778
summary(data2pc19sum2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15572 19420 36778 41101 46750 86983
data2pc19sumdf2 <- as.data.frame(data2pc19sum2)
names(dimnames(data2pc19sumdf2)) <- c("Severity", "Total")
data2pc19sumdf2$Total <- data2pc19sumdf2$data2pc19sum
data2pc19sumdf2$Severity <- row.names(data2pc19sumdf2)
data2pc19sumdf2 <- data2pc19sumdf2[-c(1)]
head(data2pc19sumdf2)
## Total Severity
## 0 19420 0
## 1 86983 1
## 2 46750 2
## 3 15572 3
## 4 36778 4
#conver the data.table to a dataframe
data2pc19df <- as.data.frame(data2pc19tb)
data2pc19df <- data2pc19df[-c(1)]
head(data2pc19df)
## Priority Freq
## 1 0 34
## 2 0 2
## 3 0 2
## 4 0 1103
## 5 0 164
## 6 0 8
#show 5 numbers and outliers w/ Frequency
pcbox19 <- ggplot(data2pc19df,
aes(x = Priority, y = Freq, fill = Priority)
) +
geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
ggtitle("2019 Crime Count of Severity") +
xlab("Severity") +
ylab("Count") +
theme(legend.position = "right") +
labs(fill = "Severity")
pcbox19
pcbox19gg <- ggplotly(pcbox19) %>% config(displayModeBar = F)
pcbox19gg
#show 5 numbers and outliers w/ rate
head(data2319tb, 10)
## city Priority Freq Census.2010 rate
## 1 ASHTON-SANDY SPRING 0 34 5628 0.6
## 2 ASHTON-SANDY SPRING 3 16 5628 0.3
## 3 ASHTON-SANDY SPRING 2 113 5628 2.0
## 4 ASHTON-SANDY SPRING 4 51 5628 0.9
## 5 ASHTON-SANDY SPRING 1 180 5628 3.2
## 6 BARNESVILLE 0 2 172 1.2
## 7 BARNESVILLE 2 14 172 8.1
## 8 BARNESVILLE 4 3 172 1.7
## 9 BARNESVILLE 1 19 172 11.0
## 10 BARNESVILLE 3 4 172 2.3
data23box19 <- ggplot(data2319tb,
aes(x = Priority, y = rate, fill = Priority)
) +
geom_boxplot(outlier.colour = "black", outlier.shape = 1, outlier.size = 2) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.9)) +
ggtitle("2019 Crime Rate by Severity") +
xlab("Severity") +
ylab("Rate") +
theme(legend.position = "right") +
labs(fill = "Severity")
data23box19
data23box19gg <- ggplotly(data23box19) %>% config(displayModeBar = F)
data23box19gg
head(data1hc)
## city 2017-01 2017-02 2017-03 2017-04 2017-05 2017-06 2017-07 2017-08
## 1 SILVER SPRING 390620 392487 395550 396275 396810 397301 398409 399917
## 2 GERMANTOWN 291883 293267 295705 296331 296689 297053 298068 299153
## 3 BETHESDA 896903 898448 903592 904371 905178 905056 905350 905851
## 4 GAITHERSBURG 368529 370345 373394 374416 375105 375409 376512 377779
## 5 ROCKVILLE 501038 503002 506782 507513 508353 508382 509133 509784
## 6 POTOMAC 951291 953254 959322 960167 961027 960982 962343 964478
## 2017-09 2017-10 2017-11 2017-12 2018-01 2018-02 2018-03 2018-04 2018-05
## 1 400649 401799 403398 404867 403991 405137 406518 409937 411555
## 2 299483 299749 300672 301461 300785 301398 302329 304773 305909
## 3 904372 906099 908490 912808 911467 915507 918452 924509 925451
## 4 378432 378835 379742 380717 379476 379885 380437 383072 383925
## 5 509574 509880 510939 511967 510069 510892 512176 516266 517852
## 6 964886 966953 969420 971791 968505 970046 970918 976201 977724
## 2018-06 2018-07 2018-08 2018-09 2018-10 2018-11 2018-12 2019-01 2019-02
## 1 413016 414837 416193 417308 417280 417085 418167 421278 422691
## 2 306714 307552 308227 308794 308906 308767 309579 311669 312509
## 3 926120 928273 929363 930919 929842 929418 929906 935502 934825
## 4 384539 385236 385666 386160 386305 386470 387711 390667 391811
## 5 519130 520562 521655 522614 522285 521718 522561 526000 526753
## 6 978552 979232 978567 978776 976553 974526 975783 982006 983097
## 2019-03 2019-04 2019-05 2019-06 2019-07 2019-08 2019-09 2019-10 2019-11
## 1 422709 421560 422156 422926 423173 423568 424269 425641 427091
## 2 312226 311086 311325 311863 312191 312607 313284 314321 315430
## 3 932861 928090 928897 930650 931695 933655 935748 938883 941983
## 4 391679 390188 390592 391444 391945 392552 393065 393931 394714
## 5 525500 522429 521891 521992 521660 521549 521675 522626 523679
## 6 980440 975437 974412 974122 973131 972740 972136 972543 972689
## 2019-12
## 1 427807
## 2 316055
## 3 944549
## 4 394826
## 5 523907
## 6 970608
data1hyr <- melt(data1hc,
city.var = c('city', 'year'),
variable.name = 'year')
## Using city as id variables
data1hyr$date <- as.Date(paste(data1hyr$year, "-01", sep =""))
head(data1hyr)
## city year value date
## 1 SILVER SPRING 2017-01 390620 2017-01-01
## 2 GERMANTOWN 2017-01 291883 2017-01-01
## 3 BETHESDA 2017-01 896903 2017-01-01
## 4 GAITHERSBURG 2017-01 368529 2017-01-01
## 5 ROCKVILLE 2017-01 501038 2017-01-01
## 6 POTOMAC 2017-01 951291 2017-01-01
####Median House price 2017
data1ht17 <- filter(data1hyr, year(date) == 2017)
head(data1ht17)
## city year value date
## 1 SILVER SPRING 2017-01 390620 2017-01-01
## 2 GERMANTOWN 2017-01 291883 2017-01-01
## 3 BETHESDA 2017-01 896903 2017-01-01
## 4 GAITHERSBURG 2017-01 368529 2017-01-01
## 5 ROCKVILLE 2017-01 501038 2017-01-01
## 6 POTOMAC 2017-01 951291 2017-01-01
data1ht17dec <- filter(data1hyr, year == "2017-12") #December of 2017 of Housing prices
head(data1ht17dec)
## city year value date
## 1 SILVER SPRING 2017-12 404867 2017-12-01
## 2 GERMANTOWN 2017-12 301461 2017-12-01
## 3 BETHESDA 2017-12 912808 2017-12-01
## 4 GAITHERSBURG 2017-12 380717 2017-12-01
## 5 ROCKVILLE 2017-12 511967 2017-12-01
## 6 POTOMAC 2017-12 971791 2017-12-01
data1h17ln <-ggplot(data1ht17,
aes(x = year, y = value, group = city)
) +
geom_line(aes(color = city)) +
geom_point(aes(color = city,
text = paste(paste("City: ", city, "<br>"),
paste("Year: ", year, "<br>"),
paste("Value: ", value))),
size = 2,
data = data1ht17
) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
ggtitle("Median House Value 2017") +
xlab ("Year") +
ylab ("Value(USD$Dollar)") +
theme(legend.position = "bottom")
## Warning: Ignoring unknown aesthetics: text
data1h17ln
data1h17lngg <- ggplotly(data1h17ln, tooltip = "text") %>% config(displayModeBar = F)
data1h17lngg
####Median House Price 2018
data1ht18 <- filter(data1hyr, year(date) == 2018)
head(data1ht18)
## city year value date
## 1 SILVER SPRING 2018-01 403991 2018-01-01
## 2 GERMANTOWN 2018-01 300785 2018-01-01
## 3 BETHESDA 2018-01 911467 2018-01-01
## 4 GAITHERSBURG 2018-01 379476 2018-01-01
## 5 ROCKVILLE 2018-01 510069 2018-01-01
## 6 POTOMAC 2018-01 968505 2018-01-01
data1ht18dec <- filter(data1hyr, year == "2018-12") #December of 2018 of Housing prices
head(data1ht18dec)
## city year value date
## 1 SILVER SPRING 2018-12 418167 2018-12-01
## 2 GERMANTOWN 2018-12 309579 2018-12-01
## 3 BETHESDA 2018-12 929906 2018-12-01
## 4 GAITHERSBURG 2018-12 387711 2018-12-01
## 5 ROCKVILLE 2018-12 522561 2018-12-01
## 6 POTOMAC 2018-12 975783 2018-12-01
data1h18ln <-ggplot(data1ht18,
aes(x = year, y = value, group = city)
) +
geom_line(aes(color = city)) +
geom_point(aes(color = city,
text = paste(paste("City: ", city, "<br>"),
paste("Year: ", year, "<br>"),
paste("Value: ", value))),
size = 2,
data = data1ht18
) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
ggtitle("Median House Value 2018") +
xlab ("Year") +
ylab ("Value(USD$Dollar)") +
theme(legend.position = "bottom")
## Warning: Ignoring unknown aesthetics: text
data1h18ln
data1h18lngg <- ggplotly(data1h18ln, tooltip = "text") %>% config(displayModeBar = F)
data1h18lngg
####Median House Price 2019
data1ht19 <- filter(data1hyr, year(date) == 2019)
head(data1ht19)
## city year value date
## 1 SILVER SPRING 2019-01 421278 2019-01-01
## 2 GERMANTOWN 2019-01 311669 2019-01-01
## 3 BETHESDA 2019-01 935502 2019-01-01
## 4 GAITHERSBURG 2019-01 390667 2019-01-01
## 5 ROCKVILLE 2019-01 526000 2019-01-01
## 6 POTOMAC 2019-01 982006 2019-01-01
data1ht19dec <- filter(data1hyr, year == "2019-12") #December of 2019 of Housing prices
head(data1ht19dec)
## city year value date
## 1 SILVER SPRING 2019-12 427807 2019-12-01
## 2 GERMANTOWN 2019-12 316055 2019-12-01
## 3 BETHESDA 2019-12 944549 2019-12-01
## 4 GAITHERSBURG 2019-12 394826 2019-12-01
## 5 ROCKVILLE 2019-12 523907 2019-12-01
## 6 POTOMAC 2019-12 970608 2019-12-01
data1h19ln <-ggplot(data1ht19,
aes(x = year, y = value, group = city)
) +
geom_line(aes(color = city)) +
geom_point(aes(color = city,
text = paste(paste("City: ", city, "<br>"),
paste("Year: ", year, "<br>"),
paste("Value: ", value))),
size = 2,
data = data1ht19
) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
ggtitle("Median House Value 2019") +
xlab ("Year") +
ylab ("Value(USD$Dollar)") +
theme(legend.position = "bottom")
## Warning: Ignoring unknown aesthetics: text
data1h19ln
data1h19lngg <- ggplotly(data1h19ln, tooltip = "text") %>% config(displayModeBar = F)
data1h19lngg
####merge: 2017 Medien House Price, Police Dispached Incidents, and 2010 Population
df12mrg17 <- merge(data1ht17dec, data2pc17sumdf, all = TRUE)
head(df12mrg17)
## city year value date Total
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01 295
## 2 BARNESVILLE 2017-12 473649 2017-12-01 28
## 3 BEALLSVILLE 2017-12 411979 2017-12-01 39
## 4 BETHESDA 2017-12 912808 2017-12-01 12982
## 5 BOYDS 2017-12 575839 2017-12-01 904
## 6 BRINKLOW 2017-12 671056 2017-12-01 61
df123mrg17 <- merge(df12mrg17, data3popc, all = TRUE)
head(df123mrg17)
## city year value date Total Census.2010
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01 295 5628
## 2 BARNESVILLE 2017-12 473649 2017-12-01 28 172
## 3 BEALLSVILLE 2017-12 411979 2017-12-01 39 214
## 4 BETHESDA 2017-12 912808 2017-12-01 12982 60858
## 5 BOYDS 2017-12 575839 2017-12-01 904 10460
## 6 BRINKLOW 2017-12 671056 2017-12-01 61 321
df123mrg17$rate <- round((df123mrg17$Total/df123mrg17$Census.2010)*100, digits = 1)
head(df123mrg17)
## city year value date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01 295 5628 5.2
## 2 BARNESVILLE 2017-12 473649 2017-12-01 28 172 16.3
## 3 BEALLSVILLE 2017-12 411979 2017-12-01 39 214 18.2
## 4 BETHESDA 2017-12 912808 2017-12-01 12982 60858 21.3
## 5 BOYDS 2017-12 575839 2017-12-01 904 10460 8.6
## 6 BRINKLOW 2017-12 671056 2017-12-01 61 321 19.0
df123mrg17c <- df123mrg17[-c(2, 4:6)]
row.names(df123mrg17c) <- df123mrg17c$city
head(df123mrg17c)
## city value rate
## ASHTON-SANDY SPRING ASHTON-SANDY SPRING 632284 5.2
## BARNESVILLE BARNESVILLE 473649 16.3
## BEALLSVILLE BEALLSVILLE 411979 18.2
## BETHESDA BETHESDA 912808 21.3
## BOYDS BOYDS 575839 8.6
## BRINKLOW BRINKLOW 671056 19.0
df123result17 <- df123mrg17c[-1]
head(df123result17)
## value rate
## ASHTON-SANDY SPRING 632284 5.2
## BARNESVILLE 473649 16.3
## BEALLSVILLE 411979 18.2
## BETHESDA 912808 21.3
## BOYDS 575839 8.6
## BRINKLOW 671056 19.0
df123cor17 <- cor(df123result17)
df123cor17
## value rate
## value 1.000000 -0.152234
## rate -0.152234 1.000000
corrplot(df123cor17, method="circle",
title = "2017 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
corrplot(df123cor17, method="pie",
title = "2017 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
corrplot(df123cor17, method="number",
title = "2017 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
####merge: 2018 Median House Price, Police Dispached Incidents, and 2010 Population
df12mrg18 <- merge(data1ht18dec, data2pc18sumdf, all = TRUE)
#df12mrg18 <- df12mrg18[-c(5)]
head(df12mrg18)
## city year value date Total
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01 354
## 2 BARNESVILLE 2018-12 490561 2018-12-01 37
## 3 BEALLSVILLE 2018-12 442491 2018-12-01 44
## 4 BETHESDA 2018-12 929906 2018-12-01 16554
## 5 BOYDS 2018-12 582696 2018-12-01 1211
## 6 BRINKLOW 2018-12 671896 2018-12-01 95
df123mrg18 <- merge(df12mrg18, data3popc, all = TRUE)
head(df123mrg18)
## city year value date Total Census.2010
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01 354 5628
## 2 BARNESVILLE 2018-12 490561 2018-12-01 37 172
## 3 BEALLSVILLE 2018-12 442491 2018-12-01 44 214
## 4 BETHESDA 2018-12 929906 2018-12-01 16554 60858
## 5 BOYDS 2018-12 582696 2018-12-01 1211 10460
## 6 BRINKLOW 2018-12 671896 2018-12-01 95 321
df123mrg18$rate <- round((df123mrg18$Total/df123mrg18$Census.2010)*100, digits = 1)
head(df123mrg18)
## city year value date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01 354 5628 6.3
## 2 BARNESVILLE 2018-12 490561 2018-12-01 37 172 21.5
## 3 BEALLSVILLE 2018-12 442491 2018-12-01 44 214 20.6
## 4 BETHESDA 2018-12 929906 2018-12-01 16554 60858 27.2
## 5 BOYDS 2018-12 582696 2018-12-01 1211 10460 11.6
## 6 BRINKLOW 2018-12 671896 2018-12-01 95 321 29.6
df123mrg18c <- df123mrg18[-c(2, 4:6)]
row.names(df123mrg18c) <- df123mrg18c$city
head(df123mrg18c)
## city value rate
## ASHTON-SANDY SPRING ASHTON-SANDY SPRING 639672 6.3
## BARNESVILLE BARNESVILLE 490561 21.5
## BEALLSVILLE BEALLSVILLE 442491 20.6
## BETHESDA BETHESDA 929906 27.2
## BOYDS BOYDS 582696 11.6
## BRINKLOW BRINKLOW 671896 29.6
df123result18 <- df123mrg18c[-1]
head(df123result18)
## value rate
## ASHTON-SANDY SPRING 639672 6.3
## BARNESVILLE 490561 21.5
## BEALLSVILLE 442491 20.6
## BETHESDA 929906 27.2
## BOYDS 582696 11.6
## BRINKLOW 671896 29.6
df123cor18 <- cor(df123result18)
df123cor18
## value rate
## value 1.0000000 -0.1421164
## rate -0.1421164 1.0000000
corrplot(df123cor18, method="circle",
title = "2018 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
corrplot(df123cor18, method="pie",
title = "2018 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
corrplot(df123cor18, method="number",
title = "2018 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
####merge: 2019 Median House Price, Police Dispached Incidents, and 2010 Population
df12mrg19 <- merge(data1ht19dec, data2pc19sumdf, all = TRUE)
#df12mrg19 <- df12mrg19[-c(5)]
head(df12mrg19)
## city year value date Total
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01 394
## 2 BARNESVILLE 2019-12 482206 2019-12-01 42
## 3 BEALLSVILLE 2019-12 433208 2019-12-01 54
## 4 BETHESDA 2019-12 944549 2019-12-01 15451
## 5 BOYDS 2019-12 589645 2019-12-01 1160
## 6 BRINKLOW 2019-12 678525 2019-12-01 97
df123mrg19 <- merge(df12mrg19, data3popc, all = TRUE)
head(df123mrg19)
## city year value date Total Census.2010
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01 394 5628
## 2 BARNESVILLE 2019-12 482206 2019-12-01 42 172
## 3 BEALLSVILLE 2019-12 433208 2019-12-01 54 214
## 4 BETHESDA 2019-12 944549 2019-12-01 15451 60858
## 5 BOYDS 2019-12 589645 2019-12-01 1160 10460
## 6 BRINKLOW 2019-12 678525 2019-12-01 97 321
df123mrg19$rate <- round((df123mrg19$Total/df123mrg19$Census.2010)*100, digits = 1)
head(df123mrg19)
## city year value date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01 394 5628 7.0
## 2 BARNESVILLE 2019-12 482206 2019-12-01 42 172 24.4
## 3 BEALLSVILLE 2019-12 433208 2019-12-01 54 214 25.2
## 4 BETHESDA 2019-12 944549 2019-12-01 15451 60858 25.4
## 5 BOYDS 2019-12 589645 2019-12-01 1160 10460 11.1
## 6 BRINKLOW 2019-12 678525 2019-12-01 97 321 30.2
df123mrg19c <- df123mrg19[-c(2, 4:6)]
row.names(df123mrg19c) <- df123mrg19c$city
head(df123mrg19c)
## city value rate
## ASHTON-SANDY SPRING ASHTON-SANDY SPRING 645068 7.0
## BARNESVILLE BARNESVILLE 482206 24.4
## BEALLSVILLE BEALLSVILLE 433208 25.2
## BETHESDA BETHESDA 944549 25.4
## BOYDS BOYDS 589645 11.1
## BRINKLOW BRINKLOW 678525 30.2
df123result19 <- df123mrg19c[-1]
head(df123result19)
## value rate
## ASHTON-SANDY SPRING 645068 7.0
## BARNESVILLE 482206 24.4
## BEALLSVILLE 433208 25.2
## BETHESDA 944549 25.4
## BOYDS 589645 11.1
## BRINKLOW 678525 30.2
df123cor19 <- cor(df123result19)
df123cor19
## value rate
## value 1.0000000 -0.1328734
## rate -0.1328734 1.0000000
corrplot(df123cor19, method = "circle",
title = "2019 Correlation b/w Median House pricee and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
corrplot(df123cor19, method="pie",
title = "2019 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
corrplot(df123cor19, method="number",
title = "2019 Correlation b/w Median House price and Rate of Crime",
addCoef.col = "black", sig.level = 0.05, insig = "blank", mar=c(0,0,1,0),
type = "upper", order = "hclust", col = brewer.pal(n = 8, name = "RdYlBu"))
all3yr <- do.call("rbind", list(df123mrg17, df123mrg18, df123mrg19))
head(all3yr)
## city year value date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01 295 5628 5.2
## 2 BARNESVILLE 2017-12 473649 2017-12-01 28 172 16.3
## 3 BEALLSVILLE 2017-12 411979 2017-12-01 39 214 18.2
## 4 BETHESDA 2017-12 912808 2017-12-01 12982 60858 21.3
## 5 BOYDS 2017-12 575839 2017-12-01 904 10460 8.6
## 6 BRINKLOW 2017-12 671056 2017-12-01 61 321 19.0
all3yrgg <- ggplot(all3yr,
aes(x = city, y = rate, group = city, shape = year)
) +
geom_point(aes(color = city, size = rate)) +
ggtitle("Yearly Crime Rate") +
theme(axis.text.x = element_text(size = 5)) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
# geom_text(aes(label = rate), vjust = -0.3, size = 4) +
scale_x_discrete("City", breaks = labels, labels = labels) +
scale_y_continuous("Crime rate(%=crime/population)") +
facet_wrap( ~ year)
all3yrgg
all3yrballoon <- ggballoonplot(all3yr,
fill = "rate"
)+
scale_fill_viridis_c(option = "C") +
xlab("City") +
ylab("Year") +
ggtitle("Yearly Crime Rate")
all3yrballoon
#2017
all3yr17 <- filter(all3yr, year == "2017-12")
head(all3yr17)
## city year value date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2017-12 632284 2017-12-01 295 5628 5.2
## 2 BARNESVILLE 2017-12 473649 2017-12-01 28 172 16.3
## 3 BEALLSVILLE 2017-12 411979 2017-12-01 39 214 18.2
## 4 BETHESDA 2017-12 912808 2017-12-01 12982 60858 21.3
## 5 BOYDS 2017-12 575839 2017-12-01 904 10460 8.6
## 6 BRINKLOW 2017-12 671056 2017-12-01 61 321 19.0
all3yr17gg <- ggplot(all3yr17,
aes(x = city, y = rate, group = city)
) +
geom_point(aes(color = city, size = rate)) +
ggtitle("2017 Crime Rate by city") +
theme(axis.text.x = element_text(size = 5, angle = 70, hjust = 0.9)) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
geom_text(aes(label = rate), vjust = -0.3, size = 4) +
scale_x_discrete("City", breaks = labels, labels = labels) +
scale_y_continuous("Crime rate(%=crime/population)") +
theme(legend.position = "right")
all3yr17gg
#2018
all3yr18 <- filter(all3yr, year == "2018-12")
head(all3yr18)
## city year value date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2018-12 639672 2018-12-01 354 5628 6.3
## 2 BARNESVILLE 2018-12 490561 2018-12-01 37 172 21.5
## 3 BEALLSVILLE 2018-12 442491 2018-12-01 44 214 20.6
## 4 BETHESDA 2018-12 929906 2018-12-01 16554 60858 27.2
## 5 BOYDS 2018-12 582696 2018-12-01 1211 10460 11.6
## 6 BRINKLOW 2018-12 671896 2018-12-01 95 321 29.6
all3yr18gg <- ggplot(all3yr18,
aes(x = city, y = rate, group = city)
) +
geom_point(aes(color = city, size = rate)) +
ggtitle("2018 Crime Rate by city") +
theme(axis.text.x = element_text(size = 5, angle = 70, hjust = 0.9)) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
geom_text(aes(label = rate), vjust = -0.3, size = 4) +
scale_x_discrete("City", breaks = labels, labels = labels) +
scale_y_continuous("Crime rate(%=crime/population)") +
theme(legend.position = "right")
all3yr18gg
#2019
all3yr19 <- filter(all3yr, year == "2019-12")
head(all3yr19)
## city year value date Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 2019-12 645068 2019-12-01 394 5628 7.0
## 2 BARNESVILLE 2019-12 482206 2019-12-01 42 172 24.4
## 3 BEALLSVILLE 2019-12 433208 2019-12-01 54 214 25.2
## 4 BETHESDA 2019-12 944549 2019-12-01 15451 60858 25.4
## 5 BOYDS 2019-12 589645 2019-12-01 1160 10460 11.1
## 6 BRINKLOW 2019-12 678525 2019-12-01 97 321 30.2
all3yr19gg <- ggplot(all3yr19,
aes(x = city, y = rate, group = city)
) +
geom_point(aes(color = city, size = rate)) +
ggtitle("2019 Crime Rate by city") +
theme(axis.text.x = element_text(size = 5, angle = 70, hjust = 0.9)) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
geom_text(aes(label = rate), vjust = -0.3, size = 4) +
scale_x_discrete("City", breaks = labels, labels = labels) +
scale_y_continuous("Crime rate(%=crime/population)") +
theme(legend.position = "right")
all3yr19gg
#place multiple plots together
#gridExtra::grid.arrange(data1h17ln, data1h18ln, data1h19ln, nrow=3)
data2pcgr <- data2pc %>%
group_by(city) %>%
summarise(Priority = n())
head(data2pcgr)
## # A tibble: 6 x 2
## city Priority
## <chr> <int>
## 1 ASHTON-SANDY SPRING 1130
## 2 BARNESVILLE 113
## 3 BEALLSVILLE 147
## 4 BETHESDA 47781
## 5 BOYDS 3456
## 6 BRINKLOW 269
data2pcbar <- ggplot(data2pcgr,
aes(x = city, y = Priority)
) +
geom_bar(fill = "#0073C2FF",
stat = "identity",
aes(text = paste(paste("City: ", city))),
size = 1,
data = data2pcgr
) +
geom_text(aes(label = Priority),
vjust = -0.1
) +
theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
ggtitle("Total Severity of Police Dispatched Incidents from 2017 to 2019",
subtitle = "per City in Montgomery County, MD"
) +
ylab("Severity") +
xlab("City")
## Warning: Ignoring unknown aesthetics: text
data2pcbar
data2pcbargg <- ggplotly(data2pcbar, tooltip = "text") %>% config(displayModeBar = F)
data2pcbargg
data2pcall <- ggplot(data2pcdf, aes(x = city, y = Freq, fill = Severity)
) +
geom_bar(aes(fill = Severity,
text = paste(paste("City: ", city, "<br>"),
paste("Severity: ", Severity, "<br"),
paste("Frequency: ", Freq))),
size = 1,
data = data2pcdf,
stat = "identity",
position = position_dodge(0.9)
) +
theme(axis.text.x.bottom = element_text(angle = 70, hjust = 0.9)) +
ggtitle("Total Severity of Police Dispatched Incidents from 2017 to 2019",
subtitle = "per city in Montgomery County, MD") +
ylab("Count") +
xlab("city")
## Warning: Ignoring unknown aesthetics: text
data2pcall
data2pcallgg <- ggplotly(data2pcall, tootip = "text") %>% config(displayModeBar = F)
data2pcallgg
pcplotly <- plot_ly(data2pcdf2, x =~city , y = ~X0, type = 'bar', name = '0')
pcplotly <- pcplotly %>% add_trace(y = ~X1, name = '1')
pcplotly <- pcplotly %>% add_trace(y = ~X2, name = '2')
pcplotly <- pcplotly %>% add_trace(y = ~X3, name = '3')
pcplotly <- pcplotly %>% add_trace(y = ~X4, name = '4')
pcplotly <- pcplotly %>% layout(yaxis = list(title = 'Freq'),
barmode = 'group',
title = "Total Severity of Police Dispatched Incidents from 2017 to 2019 \nper city in Montgomery County, MD"
) %>%
add_annotations(text = "Severty",
xref = "paper",
yref = "paper",
x = 1.02, xanchor = "left",
y = 0.8, yanchor = "bottom",
legendtitle = TRUE, showarrow = FALSE) %>%
layout(legend = list(y = 0.8, yanchor = "top")) %>%
config(displayModeBar = F)
pcplotly
#maybe Top6 and Low6 cities in Severity rates
##2017
data23mg17 <- merge(x = data2pc17sumdf, y = data3popc, by.x = "city", all.x = TRUE)
head(data23mg17)
## city Total Census.2010
## 1 ASHTON-SANDY SPRING 295 5628
## 2 BARNESVILLE 28 172
## 3 BEALLSVILLE 39 214
## 4 BETHESDA 12982 60858
## 5 BOYDS 904 10460
## 6 BRINKLOW 61 321
##Rate: Total Severity / Population
data23mg17$rate <- round((data23mg17$Total/data23mg17$Census.2010)*100, digits = 1)
head(data23mg17)
## city Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 295 5628 5.2
## 2 BARNESVILLE 28 172 16.3
## 3 BEALLSVILLE 39 214 18.2
## 4 BETHESDA 12982 60858 21.3
## 5 BOYDS 904 10460 8.6
## 6 BRINKLOW 61 321 19.0
data2317Top6 <- data23mg17 %>%
arrange(desc(rate)) %>%
slice(1:5)
data2317Top6 <- data2317Top6[-c(2)]
head(data2317Top6)
## city Census.2010 rate
## 1 KENSINGTON 2213 136.8
## 2 DERWOOD 2381 125.1
## 3 SILVER SPRING 71452 79.5
## 4 ROCKVILLE 61209 43.7
## 5 GAITHERSBURG 59933 40.9
p1 = alluvial_wide(as.data.frame(data2317Top6), max_variables = 4, bins = 5)
parcats(p1, marginal_histograms = FALSE, hoverinfo = "none") %>% config(displayModeBar = F)
data2317Low6 <- data23mg17 %>%
arrange(rate) %>%
slice(1:5)
data2317Low6 <- data2317Low6[-c(2)]
head(data2317Low6)
## city Census.2010 rate
## 1 ASHTON-SANDY SPRING 5628 5.2
## 2 CABIN JOHN 2280 7.4
## 3 SPENCERVILLE 1594 7.6
## 4 BOYDS 10460 8.6
## 5 WASHINGTON GROVE 555 9.0
p2 = alluvial_wide(as.data.frame(data2317Low6), max_variables = 4, bins = 5)
parcats(p2, marginal_histograms = FALSE, hoverinfo = "none") %>% config(displayModeBar = F)
head(data2pcdf)
## city Severity Freq
## 1 ASHTON-SANDY SPRING 0 104
## 2 BARNESVILLE 0 6
## 3 BEALLSVILLE 0 10
## 4 BETHESDA 0 3539
## 5 BOYDS 0 518
## 6 BRINKLOW 0 30
data2pcTop6 <- data2pcdf %>%
arrange(desc(Freq)) %>%
group_by(Severity) %>% slice(1:5)
head(data2pcTop6)
## # A tibble: 6 x 3
## # Groups: Severity [2]
## city Severity Freq
## <fct> <fct> <int>
## 1 SILVER SPRING 0 22187
## 2 GAITHERSBURG 0 9198
## 3 ROCKVILLE 0 8442
## 4 GERMANTOWN 0 6747
## 5 BETHESDA 0 3539
## 6 SILVER SPRING 1 83731
data2pcLow6 <- data2pcdf %>%
arrange(Freq) %>%
group_by(Severity) %>% slice(1:5)
head(data2pcLow6)
## # A tibble: 6 x 3
## # Groups: Severity [2]
## city Severity Freq
## <fct> <fct> <int>
## 1 BARNESVILLE 0 6
## 2 BEALLSVILLE 0 10
## 3 WASHINGTON GROVE 0 15
## 4 GARRETT PARK 0 19
## 5 GLEN ECHO 0 21
## 6 BEALLSVILLE 1 47
is_alluvia_form(as.data.frame(data2pcTop6),
axes = 1:3, silent = TRUE
)
## [1] TRUE
data2pcT6alluv <- ggplot(as.data.frame(data2pcTop6),
aes(y = Freq, axis1 = city, axis2 = Severity), alluvium = city
) +
geom_alluvium(aes(fill = city, colour = city),
width = 1/4, alpha = 2/3, decreasing = NA
) +
geom_stratum(width = 1/12, fill = "black", color = "grey") +
geom_label(stat = "stratum", infer.label = TRUE) +
scale_x_discrete(limits = c("Severity", "City"), expand = c(.05, .05)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_fill_brewer(type = "qual", palette = "Set1") +
ggtitle("Top 5 Cities in Severity")
data2pcT6alluv
is_alluvia_form(as.data.frame(data2pcLow6),
axes = 1:3, silent = TRUE
)
## [1] TRUE
data2pcL6alluv <- ggplot(as.data.frame(data2pcLow6),
aes(y = Freq, axis1 = city, axis2 = Severity), alluvium = city
) +
geom_alluvium(aes(fill = city, colour = city),
width = 1/4, alpha = 2/3, decreasing = NA
) +
geom_stratum(width = 1/12, fill = "black", color = "grey") +
geom_label(stat = "stratum", infer.label = TRUE) +
scale_x_discrete(limits = c("City", "Severity"), expand = c(.05, .05)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_fill_brewer(type = "qual", palette = "Set1") +
ggtitle("Low 6 Cities in Severity")
data2pcL6alluv
p = alluvial_wide(as.data.frame(data2pcTop6), max_variables = 3, bins = 5)
parcats(p, marginal_histograms = FALSE)
##Rate of crime by population
##2018
data23m18 <- merge(x = data2pc18sumdf, y = data3popc, by.x = "city", all.x = TRUE)
head(data23m18)
## city Total Census.2010
## 1 ASHTON-SANDY SPRING 354 5628
## 2 BARNESVILLE 37 172
## 3 BEALLSVILLE 44 214
## 4 BETHESDA 16554 60858
## 5 BOYDS 1211 10460
## 6 BRINKLOW 95 321
data23m18$rate <- round((data23m18$Total/data23m18$Census.2010)*100, digits = 1)
head(data23m18)
## city Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 354 5628 6.3
## 2 BARNESVILLE 37 172 21.5
## 3 BEALLSVILLE 44 214 20.6
## 4 BETHESDA 16554 60858 27.2
## 5 BOYDS 1211 10460 11.6
## 6 BRINKLOW 95 321 29.6
##2019
data23m19 <- merge(x = data2pc19sumdf, y = data3popc, by.x = "city", all.x = TRUE)
head(data23m19)
## city Total Census.2010
## 1 ASHTON-SANDY SPRING 394 5628
## 2 BARNESVILLE 42 172
## 3 BEALLSVILLE 54 214
## 4 BETHESDA 15451 60858
## 5 BOYDS 1160 10460
## 6 BRINKLOW 97 321
data23m19$rate <- round((data23m19$Total/data23m19$Census.2010)*100, digits = 1)
head(data23m19)
## city Total Census.2010 rate
## 1 ASHTON-SANDY SPRING 394 5628 7.0
## 2 BARNESVILLE 42 172 24.4
## 3 BEALLSVILLE 54 214 25.2
## 4 BETHESDA 15451 60858 25.4
## 5 BOYDS 1160 10460 11.1
## 6 BRINKLOW 97 321 30.2
data2pmsv0 <- filter(data2pc,
Priority == "0")
head(data2pmsv0)
## End.Time Priority Close.Type city
## 1 9/14/2019 14:29 0 TRAFFIC/TRANSPORTATION INCIDENT GERMANTOWN
## 2 4/2/2017 17:15 0 SUICIDAL PERSON/ATTEMPTED SUICIDE SILVER SPRING
## 3 3/21/2019 5:59 0 URGENT ASSIST ROCKVILLE
## 4 10/19/2018 19:48 0 TRAFFIC/TRANSPORTATION INCIDENT SILVER SPRING
## 5 4/3/2017 4:18 0 SUICIDAL PERSON/ATTEMPTED SUICIDE SILVER SPRING
## 6 4/3/2017 17:02 0 MENTAL DISORDER - VIA FRS DAMASCUS
## Police.District.Number CallTime.CallRoute Calltime.Dispatch Calltime.Arrive
## 1 5D 99 118 347
## 2 2D 99 156 588
## 3 4D 0 44 461
## 4 3D 103 153 335
## 5 3D 131 191 2227
## 6 5D 203 312 639
## Calltime.Cleared CallRoute.Dispatch Dispatch.Arrive Arrive.Cleared
## 1 4296 18 228 3949
## 2 3908 56 431 3320
## 3 35782 44 416 35321
## 4 3351 50 181 3016
## 5 4588 60 2035 2361
## 6 1159 109 327 520
## datetime time date
## 1 2019-09-14 14:29:00 14:29:00 09/14/2019
## 2 2017-04-02 17:15:00 17:15:00 04/02/2017
## 3 2019-03-21 05:59:00 05:59:00 03/21/2019
## 4 2018-10-19 19:48:00 19:48:00 10/19/2018
## 5 2017-04-03 04:18:00 04:18:00 04/03/2017
## 6 2017-04-03 17:02:00 17:02:00 04/03/2017
data2pm0hist <- data2pmsv0 %>%
group_by(city) %>%
summarise(Priority = n())
head(data2pm0hist)
## # A tibble: 6 x 2
## city Priority
## <chr> <int>
## 1 ASHTON-SANDY SPRING 104
## 2 BARNESVILLE 6
## 3 BEALLSVILLE 10
## 4 BETHESDA 3539
## 5 BOYDS 518
## 6 BRINKLOW 30
data2pm0hist <- ggplot(data2pm0hist,
aes(x = city, y = Priority)
) +
geom_bar(fill = "red",
stat = "identity"
) +
geom_text(aes(label = Priority),
vjust = -0.3
) +
theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
ggtitle("Most Severity Rate (0) of Police Dispatched Incidents",
subtitle = "per city in Montgomery County, MD"
) +
ylab("Priority(Severity)") +
xlab("city")
data3popbar <- ggplot(data3popc,
aes(x = city, y = Census.2010)
) +
geom_bar(fill = "orange", stat = "identity") +
geom_text(aes(label = Census.2010), vjust = -0.3, size = 4) +
theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
theme(plot.title = element_text(hjust = 0, size = 10)) +
ggtitle("Population of Cities in Montgomery County, MD",
subtitle = "(Source: Census Bureau 2010)"
) +
ylab("Count") +
xlab("city")
data3popbar
data3popbargg <- ggplotly(data3popbar, tooltip = "text") %>% config(displayModeBar = F)
data3popbargg
data3popln <- ggplot(data3popc,
aes(x = city, y = Census.2010)
) +
geom_linerange(aes(x = city, ymin = 0, ymax = Census.2010),
color = "gray", size = 1.5
) +
geom_point(aes(color = city,
text = paste(paste("City: ", city, "<br>"),
paste("Population 2010: ", Census.2010))),
size = 2
) +
geom_text(aes(label = Census.2010), vjust = -0.6, size = 3) +
theme(axis.text.x = element_text(angle = 70, hjust = 0.9)) +
ggtitle("Population of Cities in Montgomery County, MD",
subtitle = "(Source: Census Bureau 2010)"
) +
ylab("Count") +
xlab("city")
## Warning: Ignoring unknown aesthetics: text
data3popln
data3poplngg <- ggplotly(data3popln, tooltip = "text") %>% config(displayModeBar = F)
data3poplngg