Deepa 607 Week 1

Questions:

  Try to improve on the code in Diana’s approach for determining the top 5 agencies for each year, and
  Try to improve on her plot that shows “which agencies were involved with the 311 calls each year (limiting it to only the top 5 agencies     per        year.”

## This is to pull csv into R dataframe
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

sandy_data <-read_csv("https://raw.githubusercontent.com/dianaplunkett/607/main/sandy-311-calls-by-day.csv")

## Rows: 1783 Columns: 25

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (1): date
## dbl (24): NYC-3-1-1, ACS, BPSI, CAU, CHALL, DEP, DOB, DOE, DOF, DOHMH, DPR, ...

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(sandy_data)

## # A tibble: 6 x 25
##   date   `NYC-3-1-1`   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR
##   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10/22~           0     0     0     0     0     0     0     1     0     0     0
## 2 10/23~           0     0     0     0     0     0     0     1     0     0     0
## 3 10/24~           0     0     0     0     0     0     0     1     0     0     0
## 4 10/25~           0     0     0     0     0     0     0     4     0     0     0
## 5 10/26~           0     0     0     0     0     0     0    36     0     0     0
## 6 10/27~         207     0     0     0     0     0     0   312     0     0     0
## # ... with 13 more variables: FEMA <dbl>, HPD <dbl>, HRA <dbl>, MFANYC <dbl>,
## #   MOSE <dbl>, NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>,
## #   NYSDOL <dbl>, SBS <dbl>, NYSEMERGENCYMG <dbl>, total <dbl>

##glimpse(sandy_data)

This is to break date column into Month, Day and Year columns

library(tidyr)
sandy_data <- tidyr::separate(sandy_data, date, c('Month', 'Day', 'Year'), sep = "/",remove = FALSE)
head(sandy_data)

## # A tibble: 6 x 28
##   date   Month Day   Year  `NYC-3-1-1`   ACS  BPSI   CAU CHALL   DEP   DOB   DOE
##   <chr>  <chr> <chr> <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10/22~ 10    22    12              0     0     0     0     0     0     0     1
## 2 10/23~ 10    23    12              0     0     0     0     0     0     0     1
## 3 10/24~ 10    24    12              0     0     0     0     0     0     0     1
## 4 10/25~ 10    25    12              0     0     0     0     0     0     0     4
## 5 10/26~ 10    26    12              0     0     0     0     0     0     0    36
## 6 10/27~ 10    27    12            207     0     0     0     0     0     0   312
## # ... with 16 more variables: DOF <dbl>, DOHMH <dbl>, DPR <dbl>, FEMA <dbl>,
## #   HPD <dbl>, HRA <dbl>, MFANYC <dbl>, MOSE <dbl>, NYCEM <dbl>, NYCHA <dbl>,
## #   NYCSERVICE <dbl>, NYPD <dbl>, NYSDOL <dbl>, SBS <dbl>,
## #   NYSEMERGENCYMG <dbl>, total <dbl>

## This is to pull required columns only (I removed the date, Month, Day and total columns)

library(tidyr)
sandy_data <- sandy_data[, c("Year", "NYC-3-1-1", "ACS", "BPSI", "CAU", "CHALL", "DEP", "DOB", "DOE", "DOF", "DOHMH", "DPR", "FEMA", "HPD", "HRA", "MFANYC", "MOSE", "NYCEM", "NYCHA", "NYCSERVICE", "NYPD", "NYSDOL", "SBS", "NYSEMERGENCYMG")]

head(sandy_data)

## # A tibble: 6 x 24
##   Year  `NYC-3-1-1`   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR
##   <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12              0     0     0     0     0     0     0     1     0     0     0
## 2 12              0     0     0     0     0     0     0     1     0     0     0
## 3 12              0     0     0     0     0     0     0     1     0     0     0
## 4 12              0     0     0     0     0     0     0     4     0     0     0
## 5 12              0     0     0     0     0     0     0    36     0     0     0
## 6 12            207     0     0     0     0     0     0   312     0     0     0
## # ... with 12 more variables: FEMA <dbl>, HPD <dbl>, HRA <dbl>, MFANYC <dbl>,
## #   MOSE <dbl>, NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>,
## #   NYSDOL <dbl>, SBS <dbl>, NYSEMERGENCYMG <dbl>

This is to update the NYC-3-1-1 column name into NYC311 just to keep it clean and consistent

colnames(sandy_data) <- c("Year", "NYC311", "ACS", "BPSI", "CAU", "CHALL", "DEP", "DOB", "DOE", "DOF", "DOHMH", "DPR", "FEMA", "HPD", "HRA", "MFANYC", "MOSE", "NYCEM", "NYCHA", "NYCSERVICE", "NYPD", "NYSDOL", "SBS", "NYSEMERGENCYMG")

head(sandy_data)

## # A tibble: 6 x 24
##   Year  NYC311   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR  FEMA
##   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12         0     0     0     0     0     0     0     1     0     0     0     0
## 2 12         0     0     0     0     0     0     0     1     0     0     0     0
## 3 12         0     0     0     0     0     0     0     1     0     0     0     0
## 4 12         0     0     0     0     0     0     0     4     0     0     0     0
## 5 12         0     0     0     0     0     0     0    36     0     0     0     0
## 6 12       207     0     0     0     0     0     0   312     0     0     0     0
## # ... with 11 more variables: HPD <dbl>, HRA <dbl>, MFANYC <dbl>, MOSE <dbl>,
## #   NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>, NYSDOL <dbl>,
## #   SBS <dbl>, NYSEMERGENCYMG <dbl>

This is to summarize the data in the dataframe by year. The <- will update the dataframe with the new summarized data

data <- sandy_data %>%
  group_by(Year) %>%
   summarise_if(is.numeric, sum, na.rm = TRUE)

data

## # A tibble: 6 x 24
##   Year  NYC311   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR  FEMA
##   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12     28295    30   152  1371  6925    60  1934 12597  1076   197   572     0
## 2 13         0    22    19    38  3519   108   844    53  1821   348     0     0
## 3 14         0     0     0     0   196    26    91     0   472    52     0     0
## 4 15         1     0     0     0     8    15    57     0   170    10     0    10
## 5 16         2     0     0     0    16     0    39     0    46     8     0     0
## 6 17         1     0     0     0     7     0    20     0    14    16     0     0
## # ... with 11 more variables: HPD <dbl>, HRA <dbl>, MFANYC <dbl>, MOSE <dbl>,
## #   NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>, NYSDOL <dbl>,
## #   SBS <dbl>, NYSEMERGENCYMG <dbl>

This is to pull the agencies that have the highest call volumes in each year

colnames(data)[apply(data,1,which.max)]

## [1] "NYC311"         "CHALL"          "NYSEMERGENCYMG" "NYSEMERGENCYMG"
## [5] "NYSEMERGENCYMG" "NYSEMERGENCYMG"

## the code was running with errors so I had to use suppress warning command. But right now it is running without any errors. Not sure how it was taken care of.
##suppressWarnings(colnames(data)[apply(data,1,which.max)])

This is to view number of calls the top agencies received in each year

##install.packages("matrixStats")
library(matrixStats)

## 
## Attaching package: 'matrixStats'

## The following object is masked from 'package:dplyr':
## 
##     count

data1 <- (rowMaxs(as.matrix(data[,c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24)])))
data1

## [1] 28295  3519   519   271   120    69

This is to transpose the columns and rows from the data dataframe

tdata = setNames(data.frame(t(data[,-1])), data[,1])


colnames(tdata) <- c("2012", "2013", "2014", "2015", "2016", "2017")



head(tdata)

##         2012 2013 2014 2015 2016 2017
## NYC311 28295    0    0    1    2    1
## ACS       30   22    0    0    0    0
## BPSI     152   19    0    0    0    0
## CAU     1371   38    0    0    0    0
## CHALL   6925 3519  196    8   16    7
## DEP       60  108   26   15    0    0

tdata

##                 2012 2013 2014 2015 2016 2017
## NYC311         28295    0    0    1    2    1
## ACS               30   22    0    0    0    0
## BPSI             152   19    0    0    0    0
## CAU             1371   38    0    0    0    0
## CHALL           6925 3519  196    8   16    7
## DEP               60  108   26   15    0    0
## DOB             1934  844   91   57   39   20
## DOE            12597   53    0    0    0    0
## DOF             1076 1821  472  170   46   14
## DOHMH            197  348   52   10    8   16
## DPR              572    0    0    0    0    0
## FEMA               0    0    0   10    0    0
## HPD               85 1382    0    0    0    0
## HRA             1380    0    0    0    0    0
## MFANYC          1164    0    0    0    0    0
## MOSE              70  164    9    0    0    0
## NYCEM           1229  149   24    0    0    0
## NYCHA            135   57    0    0    0    0
## NYCSERVICE       473  932   40    0    0    0
## NYPD              91  115   35   21   18   12
## NYSDOL          4490  638   16    0    0    0
## SBS              524  715   47    8    0    3
## NYSEMERGENCYMG     0 1895  519  271  120   69

Deepa 607 Week 1

Deepa

3/02/2022

Overview

Questions:

This is to break date column into Month, Day and Year columns

This is to update the NYC-3-1-1 column name into NYC311 just to keep it clean and consistent

This is to summarize the data in the dataframe by year. The <- will update the dataframe with the new summarized data

This is to pull the agencies that have the highest call volumes in each year

This is to view number of calls the top agencies received in each year

This is to transpose the columns and rows from the data dataframe

Conclusion: