Tidying and Transforming Data

Load libraries

library(tidyr)
## Warning: package 'tidyr' was built under R version 3.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#1 NYC Foreclosure Data - Sedo Oh

Source: Table 1 (in the pdf file) http://furmancenter.org/files/sotc/SOC_2016_Full.pdf

Analysis: draw tread line of the foreclosure

Load Data - NYC Foreclosure

NYCForeclosure <- tbl_df(read.csv("foreclosure.csv", stringsAsFactors = FALSE, check.names = FALSE))
#Boro Summary
print(NYCForeclosure)
## # A tibble: 6 x 7
##        Boro `2011` `2012` `2013` `2014` `2015` `2016`
##       <chr>  <int>  <int>  <int>  <int>  <int>  <int>
## 1     Bronx   8770   8956   8456   5459   4324   4904
## 2  Brooklyn  21351  21768  19745  12530   9185  10877
## 3 Manhattan   1142   1154   1213    733    608    901
## 4    Queens  29307  28721  25712  16007  12563  14796
## 5  S.Island  10319  10274   9163   6222   4983   5646
## 6       NYC  70889  70873  64289  40951  31663  37124

Tidy the Foreclosure data

#get boro

boroType <- NYCForeclosure$Boro

#tidy data
NYCForeclosure.ByYear <- NYCForeclosure %>%
        gather(Yr, ForeclosureCount, 2:7)

print(NYCForeclosure.ByYear)
## # A tibble: 36 x 3
##         Boro    Yr ForeclosureCount
##        <chr> <chr>            <int>
##  1     Bronx  2011             8770
##  2  Brooklyn  2011            21351
##  3 Manhattan  2011             1142
##  4    Queens  2011            29307
##  5  S.Island  2011            10319
##  6       NYC  2011            70889
##  7     Bronx  2012             8956
##  8  Brooklyn  2012            21768
##  9 Manhattan  2012             1154
## 10    Queens  2012            28721
## # ... with 26 more rows

Modifed from http://www.statmethods.net/graphs/line.html

#get the range for x,y
xrange = range(NYCForeclosure.ByYear$Yr)
yrange = range(NYCForeclosure.ByYear$ForeclosureCount)

#setup plot

plot(xrange, yrange, type="n", xlab="Year",
    ylab="Foreclosure Count" ) 
colors <- rainbow(length(boroType)) 
linetype <- c(1:length(boroType)) 
plotchar <- seq(18,18+length(boroType),1)

# add lines 
for (i in 1:length(boroType)) { 
  boroforeclosure <- subset(NYCForeclosure.ByYear, NYCForeclosure.ByYear$Boro==boroType[i]) 
  lines(boroforeclosure$Yr, boroforeclosure$ForeclosureCount, type="b", lwd=1.5,
    lty=linetype[i], col=colors[i], pch=plotchar[i]) 
} 

# add a title and subtitle 
title("NYC Foreclosure", "")

# add a legend 
legend( "topright", boroType, cex=0.8, col=colors,
    pch=plotchar, lty=linetype, title="Boro")

As we can see above, NYC foreclosure rate gradually decreased but it slightly picked up again from 2015 to 2016.

#2 Oil supply and demand, price - Silverio Vasquez - EIA Short-term Energy Outlook

Tidy data can be used to downstream analysis. I saved the data in the csv format and add grouping for easy calcuation before tidying

EIA_OilSupply <- tbl_df(read.csv("EIA_OilSupply.csv", stringsAsFactors = FALSE, check.names = FALSE))
EIA_OilConsumption <- tbl_df(read.csv("EIA_OilConsumption.csv", stringsAsFactors = FALSE, check.names = FALSE))
EIA_OilPrice <- tbl_df(read.csv("EIA_OilPrice.csv", stringsAsFactors = FALSE, check.names = FALSE))

#all oil data
print(EIA_OilSupply)
## # A tibble: 9 x 75
##                 CODE    GROUP                   DESC `1/31/2013`
##                <chr>    <chr>                  <chr>       <dbl>
## 1            papr_us     OECD       U.S. (50 States)       11.59
## 2            papr_ca     OECD                 Canada        4.12
## 3            papr_mx     OECD                 Mexico        2.96
## 4     papr_otheroecd     OECD             Other OECD        4.40
## 5          copr_opec Non-OECD OPEC Crude Oil Portion       30.40
## 6            opec_nc Non-OECD OPEC Other Liquids (b)        6.43
## 7           papr_fsu Non-OECD    Former Soviet Union       13.74
## 8            papr_ch Non-OECD                  China        4.89
## 9 papr_other_nonoecd Non-OECD         Other Non-OECD       11.66
## # ... with 71 more variables: `2/28/2013` <dbl>, `3/31/2013` <dbl>,
## #   `4/30/2013` <dbl>, `5/31/2013` <dbl>, `6/30/2013` <dbl>,
## #   `7/31/2013` <dbl>, `8/31/2013` <dbl>, `9/30/2013` <dbl>,
## #   `10/31/2013` <dbl>, `11/30/2013` <dbl>, `12/31/2013` <dbl>,
## #   `1/31/2014` <dbl>, `2/28/2014` <dbl>, `3/31/2014` <dbl>,
## #   `4/30/2014` <dbl>, `5/31/2014` <dbl>, `6/30/2014` <dbl>,
## #   `7/31/2014` <dbl>, `8/31/2014` <dbl>, `9/30/2014` <dbl>,
## #   `10/31/2014` <dbl>, `11/30/2014` <dbl>, `12/31/2014` <dbl>,
## #   `1/31/2015` <dbl>, `2/28/2015` <dbl>, `3/31/2015` <dbl>,
## #   `4/30/2015` <dbl>, `5/31/2015` <dbl>, `6/30/2015` <dbl>,
## #   `7/31/2015` <dbl>, `8/31/2015` <dbl>, `9/30/2015` <dbl>,
## #   `10/31/2015` <dbl>, `11/30/2015` <dbl>, `12/31/2015` <dbl>,
## #   `1/31/2016` <dbl>, `2/29/2016` <dbl>, `3/31/2016` <dbl>,
## #   `4/30/2016` <dbl>, `5/31/2016` <dbl>, `6/30/2016` <dbl>,
## #   `7/31/2016` <dbl>, `8/31/2016` <dbl>, `9/30/2016` <dbl>,
## #   `10/31/2016` <dbl>, `11/30/2016` <dbl>, `12/31/2016` <dbl>,
## #   `1/31/2017` <dbl>, `2/28/2017` <dbl>, `3/31/2017` <dbl>,
## #   `4/30/2017` <dbl>, `5/31/2017` <dbl>, `6/30/2017` <dbl>,
## #   `7/31/2017` <dbl>, `8/31/2017` <dbl>, `9/30/2017` <dbl>,
## #   `10/31/2017` <dbl>, `11/30/2017` <dbl>, `12/31/2017` <dbl>,
## #   `1/31/2018` <dbl>, `2/28/2018` <dbl>, `3/31/2018` <dbl>,
## #   `4/30/2018` <dbl>, `5/31/2018` <dbl>, `6/30/2018` <dbl>,
## #   `7/31/2018` <dbl>, `8/31/2018` <dbl>, `9/30/2018` <dbl>,
## #   `10/31/2018` <dbl>, `11/30/2018` <dbl>, `12/31/2018` <dbl>
print(EIA_OilConsumption)
## # A tibble: 11 x 75
##                   CODE    GROUP             DESC `1/31/2013` `2/28/2013`
##                  <chr>    <chr>            <chr>       <dbl>       <dbl>
##  1             patc_us     OECD U.S. (50 States)       18.75       18.64
##  2            patc_ust     OECD U.S. Territories        0.28        0.28
##  3             patc_ca     OECD           Canada        2.52        2.48
##  4    patc_oecd_europe     OECD           Europe       12.82       13.41
##  5             patc_ja     OECD            Japan        5.08        5.19
##  6     patc_other_oecd     OECD       Other OECD        6.37        6.48
##  7            patc_fsu Non-OECD          Eurasia        4.16        4.42
##  8 patc_nonoecd_europe Non-OECD           Europe        0.62        0.64
##  9             patc_ch Non-OECD            China       10.88       11.67
## 10     patc_other_asia Non-OECD       Other Asia       11.48       11.63
## 11  patc_other_nonoecd Non-OECD   Other Non-OECD       17.84       17.67
## # ... with 70 more variables: `3/31/2013` <dbl>, `4/30/2013` <dbl>,
## #   `5/31/2013` <dbl>, `6/30/2013` <dbl>, `7/31/2013` <dbl>,
## #   `8/31/2013` <dbl>, `9/30/2013` <dbl>, `10/31/2013` <dbl>,
## #   `11/30/2013` <dbl>, `12/31/2013` <dbl>, `1/31/2014` <dbl>,
## #   `2/28/2014` <dbl>, `3/31/2014` <dbl>, `4/30/2014` <dbl>,
## #   `5/31/2014` <dbl>, `6/30/2014` <dbl>, `7/31/2014` <dbl>,
## #   `8/31/2014` <dbl>, `9/30/2014` <dbl>, `10/31/2014` <dbl>,
## #   `11/30/2014` <dbl>, `12/31/2014` <dbl>, `1/31/2015` <dbl>,
## #   `2/28/2015` <dbl>, `3/31/2015` <dbl>, `4/30/2015` <dbl>,
## #   `5/31/2015` <dbl>, `6/30/2015` <dbl>, `7/31/2015` <dbl>,
## #   `8/31/2015` <dbl>, `9/30/2015` <dbl>, `10/31/2015` <dbl>,
## #   `11/30/2015` <dbl>, `12/31/2015` <dbl>, `1/31/2016` <dbl>,
## #   `2/29/2016` <dbl>, `3/31/2016` <dbl>, `4/30/2016` <dbl>,
## #   `5/31/2016` <dbl>, `6/30/2016` <dbl>, `7/31/2016` <dbl>,
## #   `8/31/2016` <dbl>, `9/30/2016` <dbl>, `10/31/2016` <dbl>,
## #   `11/30/2016` <dbl>, `12/31/2016` <dbl>, `1/31/2017` <dbl>,
## #   `2/28/2017` <dbl>, `3/31/2017` <dbl>, `4/30/2017` <dbl>,
## #   `5/31/2017` <dbl>, `6/30/2017` <dbl>, `7/31/2017` <dbl>,
## #   `8/31/2017` <dbl>, `9/30/2017` <dbl>, `10/31/2017` <dbl>,
## #   `11/30/2017` <dbl>, `12/31/2017` <dbl>, `1/31/2018` <dbl>,
## #   `2/28/2018` <dbl>, `3/31/2018` <dbl>, `4/30/2018` <dbl>,
## #   `5/31/2018` <dbl>, `6/30/2018` <dbl>, `7/31/2018` <dbl>,
## #   `8/31/2018` <dbl>, `9/30/2018` <dbl>, `10/31/2018` <dbl>,
## #   `11/30/2018` <dbl>, `12/31/2018` <dbl>
print(EIA_OilPrice)
## # A tibble: 4 x 75
##      CODE     GROUP                                     DESC `1/31/2013`
##     <chr>     <chr>                                    <chr>       <dbl>
## 1 WTIPUUS Crude Oil     West Texas Intermediate Spot Average       94.76
## 2 BREPUUS Crude Oil                       Brent Spot Average      112.96
## 3 RAIMUUS Crude Oil                    U.S. Imported Average       97.91
## 4 RACPUUS Crude Oil    U.S. Refiner Average Acquisition Cost      100.78
## # ... with 71 more variables: `2/28/2013` <dbl>, `3/31/2013` <dbl>,
## #   `4/30/2013` <dbl>, `5/31/2013` <dbl>, `6/30/2013` <dbl>,
## #   `7/31/2013` <dbl>, `8/31/2013` <dbl>, `9/30/2013` <dbl>,
## #   `10/31/2013` <dbl>, `11/30/2013` <dbl>, `12/31/2013` <dbl>,
## #   `1/31/2014` <dbl>, `2/28/2014` <dbl>, `3/31/2014` <dbl>,
## #   `4/30/2014` <dbl>, `5/31/2014` <dbl>, `6/30/2014` <dbl>,
## #   `7/31/2014` <dbl>, `8/31/2014` <dbl>, `9/30/2014` <dbl>,
## #   `10/31/2014` <dbl>, `11/30/2014` <dbl>, `12/31/2014` <dbl>,
## #   `1/31/2015` <dbl>, `2/28/2015` <dbl>, `3/31/2015` <dbl>,
## #   `4/30/2015` <dbl>, `5/31/2015` <dbl>, `6/30/2015` <dbl>,
## #   `7/31/2015` <dbl>, `8/31/2015` <dbl>, `9/30/2015` <dbl>,
## #   `10/31/2015` <dbl>, `11/30/2015` <dbl>, `12/31/2015` <dbl>,
## #   `1/31/2016` <dbl>, `2/29/2016` <dbl>, `3/31/2016` <dbl>,
## #   `4/30/2016` <dbl>, `5/31/2016` <dbl>, `6/30/2016` <dbl>,
## #   `7/31/2016` <dbl>, `8/31/2016` <dbl>, `9/30/2016` <dbl>,
## #   `10/31/2016` <dbl>, `11/30/2016` <dbl>, `12/31/2016` <dbl>,
## #   `1/31/2017` <dbl>, `2/28/2017` <dbl>, `3/31/2017` <dbl>,
## #   `4/30/2017` <dbl>, `5/31/2017` <dbl>, `6/30/2017` <dbl>,
## #   `7/31/2017` <dbl>, `8/31/2017` <dbl>, `9/30/2017` <dbl>,
## #   `10/31/2017` <dbl>, `11/30/2017` <dbl>, `12/31/2017` <dbl>,
## #   `1/31/2018` <dbl>, `2/28/2018` <dbl>, `3/31/2018` <dbl>,
## #   `4/30/2018` <dbl>, `5/31/2018` <dbl>, `6/30/2018` <dbl>,
## #   `7/31/2018` <dbl>, `8/31/2018` <dbl>, `9/30/2018` <dbl>,
## #   `10/31/2018` <dbl>, `11/30/2018` <dbl>, `12/31/2018` <dbl>

Tidy the Oil data

#tidy data
EIA_OilSupply.ByMonth <- EIA_OilSupply %>%
        gather(Month, MilBarrels, 4:75)

EIA_OilConsumption.ByMonth <- EIA_OilConsumption %>%
        gather(Month, MilBarrels, 4:75)

EIA_OilPrice.ByMonth <- EIA_OilPrice %>%
        gather(DT, PerBarrel, 4:75)


head(EIA_OilSupply.ByMonth)
## # A tibble: 6 x 5
##             CODE    GROUP                   DESC     Month MilBarrels
##            <chr>    <chr>                  <chr>     <chr>      <dbl>
## 1        papr_us     OECD       U.S. (50 States) 1/31/2013      11.59
## 2        papr_ca     OECD                 Canada 1/31/2013       4.12
## 3        papr_mx     OECD                 Mexico 1/31/2013       2.96
## 4 papr_otheroecd     OECD             Other OECD 1/31/2013       4.40
## 5      copr_opec Non-OECD OPEC Crude Oil Portion 1/31/2013      30.40
## 6        opec_nc Non-OECD OPEC Other Liquids (b) 1/31/2013       6.43
head(EIA_OilConsumption.ByMonth)
## # A tibble: 6 x 5
##               CODE GROUP             DESC     Month MilBarrels
##              <chr> <chr>            <chr>     <chr>      <dbl>
## 1          patc_us  OECD U.S. (50 States) 1/31/2013      18.75
## 2         patc_ust  OECD U.S. Territories 1/31/2013       0.28
## 3          patc_ca  OECD           Canada 1/31/2013       2.52
## 4 patc_oecd_europe  OECD           Europe 1/31/2013      12.82
## 5          patc_ja  OECD            Japan 1/31/2013       5.08
## 6  patc_other_oecd  OECD       Other OECD 1/31/2013       6.37
head(EIA_OilPrice.ByMonth)
## # A tibble: 6 x 5
##      CODE     GROUP                                     DESC        DT
##     <chr>     <chr>                                    <chr>     <chr>
## 1 WTIPUUS Crude Oil     West Texas Intermediate Spot Average 1/31/2013
## 2 BREPUUS Crude Oil                       Brent Spot Average 1/31/2013
## 3 RAIMUUS Crude Oil                    U.S. Imported Average 1/31/2013
## 4 RACPUUS Crude Oil    U.S. Refiner Average Acquisition Cost 1/31/2013
## 5 WTIPUUS Crude Oil     West Texas Intermediate Spot Average 2/28/2013
## 6 BREPUUS Crude Oil                       Brent Spot Average 2/28/2013
## # ... with 1 more variables: PerBarrel <dbl>

Data above can be merged and summarized. E.g. US import/consumption vs import avg price.

tmp_month <- names(EIA_OilSupply)
Months <- tmp_month[4:75]

US_oil.supply <- EIA_OilSupply.ByMonth %>% filter(DESC == "U.S. (50 States)")
## Warning: package 'bindrcpp' was built under R version 3.4.2
US_oil.Consume_50_States <- EIA_OilConsumption.ByMonth %>% filter(DESC == "U.S. (50 States)")
US_oil.Consume_US_Territories <- EIA_OilConsumption.ByMonth %>% filter(DESC == "U.S. Territories")
US_oil.import <- EIA_OilPrice.ByMonth %>% filter(CODE == "RAIMUUS")

US_oil <- data.frame(
  Record_Months <- Months,
  Supply <- US_oil.supply$MilBarrels,
  Consume <- US_oil.Consume_50_States$MilBarrels + US_oil.Consume_US_Territories$MilBarrels,
  ImportAvgPrice <- US_oil.import$PerBarrel 
  
)

print(US_oil)
##    Record_Months....Months Supply....US_oil.supply.MilBarrels
## 1                1/31/2013                              11.59
## 2                2/28/2013                              11.67
## 3                3/31/2013                              11.83
## 4                4/30/2013                              12.17
## 5                5/31/2013                              12.11
## 6                6/30/2013                              12.11
## 7                7/31/2013                              12.46
## 8                8/31/2013                              12.59
## 9                9/30/2013                              12.88
## 10              10/31/2013                              12.82
## 11              11/30/2013                              13.06
## 12              12/31/2013                              13.09
## 13               1/31/2014                              13.04
## 14               2/28/2014                              13.09
## 15               3/31/2014                              13.31
## 16               4/30/2014                              13.90
## 17               5/31/2014                              13.85
## 18               6/30/2014                              14.26
## 19               7/31/2014                              14.35
## 20               8/31/2014                              14.44
## 21               9/30/2014                              14.53
## 22              10/31/2014                              14.73
## 23              11/30/2014                              14.90
## 24              12/31/2014                              15.13
## 25               1/31/2015                              14.75
## 26               2/28/2015                              14.97
## 27               3/31/2015                              15.06
## 28               4/30/2015                              15.33
## 29               5/31/2015                              15.18
## 30               6/30/2015                              15.03
## 31               7/31/2015                              15.20
## 32               8/31/2015                              15.20
## 33               9/30/2015                              15.20
## 34              10/31/2015                              15.17
## 35              11/30/2015                              15.22
## 36              12/31/2015                              15.10
## 37               1/31/2016                              14.92
## 38               2/29/2016                              14.82
## 39               3/31/2016                              15.01
## 40               4/30/2016                              14.79
## 41               5/31/2016                              14.96
## 42               6/30/2016                              14.79
## 43               7/31/2016                              14.83
## 44               8/31/2016                              14.65
## 45               9/30/2016                              14.45
## 46              10/31/2016                              14.73
## 47              11/30/2016                              14.96
## 48              12/31/2016                              14.69
## 49               1/31/2017                              14.70
## 50               2/28/2017                              15.08
## 51               3/31/2017                              15.28
## 52               4/30/2017                              15.21
## 53               5/31/2017                              15.40
## 54               6/30/2017                              15.41
## 55               7/31/2017                              15.60
## 56               8/31/2017                              15.44
## 57               9/30/2017                              15.49
## 58              10/31/2017                              15.96
## 59              11/30/2017                              16.21
## 60              12/31/2017                              16.25
## 61               1/31/2018                              16.25
## 62               2/28/2018                              16.33
## 63               3/31/2018                              16.48
## 64               4/30/2018                              16.53
## 65               5/31/2018                              16.67
## 66               6/30/2018                              16.61
## 67               7/31/2018                              16.68
## 68               8/31/2018                              16.60
## 69               9/30/2018                              16.52
## 70              10/31/2018                              16.73
## 71              11/30/2018                              17.01
## 72              12/31/2018                              17.05
##    Consume....US_oil.Consume_50_States.MilBarrels...US_oil.Consume_US_Territories.MilBarrels
## 1                                                                                      19.03
## 2                                                                                      18.92
## 3                                                                                      18.81
## 4                                                                                      18.86
## 5                                                                                      19.06
## 6                                                                                      19.09
## 7                                                                                      19.54
## 8                                                                                      19.40
## 9                                                                                      19.53
## 10                                                                                     19.59
## 11                                                                                     19.77
## 12                                                                                     19.26
## 13                                                                                     19.36
## 14                                                                                     19.17
## 15                                                                                     18.72
## 16                                                                                     19.11
## 17                                                                                     18.85
## 18                                                                                     19.15
## 19                                                                                     19.54
## 20                                                                                     19.66
## 21                                                                                     19.51
## 22                                                                                     19.95
## 23                                                                                     19.63
## 24                                                                                     19.72
## 25                                                                                     19.52
## 26                                                                                     19.98
## 27                                                                                     19.65
## 28                                                                                     19.56
## 29                                                                                     19.60
## 30                                                                                     20.14
## 31                                                                                     20.43
## 32                                                                                     20.23
## 33                                                                                     19.72
## 34                                                                                     19.80
## 35                                                                                     19.44
## 36                                                                                     19.90
## 37                                                                                     19.34
## 38                                                                                     19.96
## 39                                                                                     19.90
## 40                                                                                     19.54
## 41                                                                                     19.48
## 42                                                                                     20.08
## 43                                                                                     19.99
## 44                                                                                     20.41
## 45                                                                                     20.14
## 46                                                                                     19.90
## 47                                                                                     19.93
## 48                                                                                     20.26
## 49                                                                                     19.52
## 50                                                                                     19.48
## 51                                                                                     20.32
## 52                                                                                     19.82
## 53                                                                                     20.31
## 54                                                                                     20.78
## 55                                                                                     20.88
## 56                                                                                     20.56
## 57                                                                                     20.29
## 58                                                                                     20.33
## 59                                                                                     20.33
## 60                                                                                     20.59
## 61                                                                                     20.11
## 62                                                                                     20.23
## 63                                                                                     20.48
## 64                                                                                     20.31
## 65                                                                                     20.51
## 66                                                                                     20.91
## 67                                                                                     21.15
## 68                                                                                     21.15
## 69                                                                                     20.81
## 70                                                                                     20.81
## 71                                                                                     20.76
## 72                                                                                     21.00
##    ImportAvgPrice....US_oil.import.PerBarrel
## 1                                      97.91
## 2                                      99.23
## 3                                      99.11
## 4                                      96.45
## 5                                      98.50
## 6                                      97.17
## 7                                     101.56
## 8                                     104.16
## 9                                     103.49
## 10                                     97.84
## 11                                     90.36
## 12                                     90.57
## 13                                     89.71
## 14                                     96.10
## 15                                     97.13
## 16                                     97.33
## 17                                     98.46
## 18                                    100.26
## 19                                     98.75
## 20                                     93.23
## 21                                     89.38
## 22                                     82.75
## 23                                     74.34
## 24                                     57.36
## 25                                     44.74
## 26                                     47.18
## 27                                     47.22
## 28                                     51.62
## 29                                     57.51
## 30                                     58.89
## 31                                     52.42
## 32                                     43.23
## 33                                     41.12
## 34                                     42.03
## 35                                     39.05
## 36                                     33.16
## 37                                     27.48
## 38                                     26.66
## 39                                     32.24
## 40                                     35.90
## 41                                     40.88
## 42                                     44.13
## 43                                     41.48
## 44                                     41.21
## 45                                     40.86
## 46                                     44.76
## 47                                     41.80
## 48                                     46.72
## 49                                     48.12
## 50                                     49.38
## 51                                     46.53
## 52                                     47.47
## 53                                     46.94
## 54                                     43.44
## 55                                     43.13
## 56                                     44.54
## 57                                     45.00
## 58                                     44.00
## 59                                     44.50
## 60                                     44.50
## 61                                     44.50
## 62                                     44.50
## 63                                     44.50
## 64                                     44.50
## 65                                     44.50
## 66                                     44.50
## 67                                     45.50
## 68                                     46.50
## 69                                     47.50
## 70                                     48.50
## 71                                     48.50
## 72                                     49.50

#3 NYPD Motor Vehicle Collisions - Yuen Chun Wong

http://www1.nyc.gov/site/nypd/stats/traffic-data/traffic-data-collision.page It is Aug 2017 data. I saved the data in csv format and merge the column for location.

NYPDCollision <- tbl_df(read.csv("NYPDAug2017_Motor_Vehicle_Collisions.csv", stringsAsFactors = FALSE, check.names = FALSE))
#snapshot of data
head(NYPDCollision)
## # A tibble: 6 x 15
##   GeoCode           Boro_Location  GeoCodeLabel
##     <int>                   <chr>         <chr>
## 1       1  Manhattan 1st Precinct  1st Precinct
## 2       5  Manhattan 5th Precinct  5th Precinct
## 3       6  Manhattan 6th Precinct  6th Precinct
## 4       7  Manhattan 7th Precinct  7th Precinct
## 5       9  Manhattan 9th Precinct  9th Precinct
## 6      10 Manhattan 10th Precinct 10th Precinct
## # ... with 12 more variables: Number_of_Motor_Vehicle_Collisions <int>,
## #   Vehicles_or_Motorists_Involved <int>,
## #   Injury_or_Fatal_Collisions <int>, MotoristsInjured <int>,
## #   MotoristsKilled <int>, PassengInjured <int>, PassengKilled <int>,
## #   CyclistsInjured <int>, CyclistsKilled <int>, PedestrInjured <int>,
## #   PedestrKilled <int>, Bicycle <int>
NYPDCollision.ByInjuryType <- NYPDCollision %>% gather(InJury_type, Freq, 4:15)

Analysis: Min & Max by Injury Type modified from https://stackoverflow.com/questions/31284794/how-do-i-use-group-by-with-order-function-in-r https://stackoverflow.com/questions/31528981/select-first-and-last-row-from-grouped-data

NYPDCollision.ByInjuryType_Max <- NYPDCollision.ByInjuryType %>% group_by(InJury_type) %>% arrange(-Freq) %>%
  filter(row_number()==1)

NYPDCollision.ByInjuryType_Min <- NYPDCollision.ByInjuryType %>% group_by(InJury_type) %>% arrange(Freq) %>%
  filter(row_number()==1)

(NYPDCollision.ByInjuryType_Max)
## # A tibble: 12 x 5
## # Groups:   InJury_type [12]
##    GeoCode          Boro_Location   GeoCodeLabel
##      <int>                  <chr>          <chr>
##  1     109  Queens 109th Precinct 109th Precinct
##  2     109  Queens 109th Precinct 109th Precinct
##  3     105  Queens 105th Precinct 105th Precinct
##  4     105  Queens 105th Precinct 105th Precinct
##  5     105  Queens 105th Precinct 105th Precinct
##  6      75 Brooklyn 75th Precinct  75th Precinct
##  7      90 Brooklyn 90th Precinct  90th Precinct
##  8      90 Brooklyn 90th Precinct  90th Precinct
##  9      47    Bronx 47th Precinct  47th Precinct
## 10     112  Queens 112th Precinct 112th Precinct
## 11      45    Bronx 45th Precinct  45th Precinct
## 12      66 Brooklyn 66th Precinct  66th Precinct
## # ... with 2 more variables: InJury_type <chr>, Freq <int>
(NYPDCollision.ByInjuryType_Min)
## # A tibble: 12 x 5
## # Groups:   InJury_type [12]
##    GeoCode                   Boro_Location          GeoCodeLabel
##      <int>                           <chr>                 <chr>
##  1      22 Manhattan Central Park Precinct Central Park Precinct
##  2       1          Manhattan 1st Precinct          1st Precinct
##  3      22 Manhattan Central Park Precinct Central Park Precinct
##  4       1          Manhattan 1st Precinct          1st Precinct
##  5      22 Manhattan Central Park Precinct Central Park Precinct
##  6       1          Manhattan 1st Precinct          1st Precinct
##  7       5          Manhattan 5th Precinct          5th Precinct
##  8      22 Manhattan Central Park Precinct Central Park Precinct
##  9      22 Manhattan Central Park Precinct Central Park Precinct
## 10      30         Manhattan 30th Precinct         30th Precinct
## 11      22 Manhattan Central Park Precinct Central Park Precinct
## 12      22 Manhattan Central Park Precinct Central Park Precinct
## # ... with 2 more variables: InJury_type <chr>, Freq <int>

As data above, We can conclude: 1. Queens 109th Precinct has highest collison rate while Manhattan Central Park Precinct has lowerest collison rate.
2. More Traffic police and road design may be required in Queens 109th Precinct. 3. Few vehicles are Manhattan Central Park Precinct area.