#Download required packages
library(XML)
library(RCurl)
## Loading required package: bitops
library(xlsx)
## Loading required package: rJava
## 
## Attaching package: 'rJava'
## The following object is masked from 'package:RCurl':
## 
##     clone
## Loading required package: xlsxjars
library(rvest)
## Loading required package: xml2
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
## 
##     xml
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
## 
##     complete
library(ggplot2)
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.3
## Loading required package: RColorBrewer

Data Source

download data from NYC govement open data

311 Service Requests from 2010 to Present https://data.cityofnewyork.us/dataset/311-Service-Requests-From-2011/fpz8-jqf4 Median Household Income 2017 by zip code http://zipatlas.com/us/ny/zip-code-comparison/median-household-income.htm NYC Weather, search New York, NY for area of interest https://www.ncdc.noaa.gov/cdo-web/confirmation

Transformed data- CleanZipCodePerDayComplaintFreq.txt

#Data set: we need to create a table which contains daily weather, complaint types by zip code. 

#Prepare raw data sets: read in csv or txt files into R 

raw_call311_2013to2017 <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/CleanZipCodePerDayComplaintFreq.txt", header=TRUE, sep=",")

raw_nyc_zipcode <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/Zipcode_tidy.csv", header=TRUE, sep=",")

Clean Zip Codes

#Clean Zip Code: filter 311 Calls insident zip code by New York zip code table 
zipCode <- raw_nyc_zipcode$Zip.Code

Total Complaint Types Word Cloud

word_count <- raw_call311_2013to2017 %>%
  drop_na() %>%
  filter(Type != "TAVG") %>%
  group_by(Type) %>%
  summarize(
    freq = sum(Freq)
  )

set.seed(1234)
wordcloud(words = word_count$Type, freq = word_count$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.1 
          )
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Noise - Residential could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : HEAT/HOT WATER could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : General Construction/Plumbing could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DOF Property - Property Value could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Beach/Pool/Sauna Complaint could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Home Delivered Meal - Missed Delivery could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Unsanitary Animal Facility could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Advocate-Personal Exemptions could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Municipal Parking Facility could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Home Delivered Meal Complaint could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Special Natural Area District (SNAD) could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Animal Facility - No Permit could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Illegal Animal Sold could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DOF Property - City Rebate could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Overflowing Recycling Baskets could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Home Care Provider Complaint could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Case Management Agency Complaint could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Bereavement Support Group could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DHS Advantage - Tenant could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Sprinkler - Mechanical could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Rangehood could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Transportation Provider Complaint could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Calorie Labeling could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Legal Services Provider Complaint could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : X-Ray Machine/Equipment could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DHS Advantage -Landlord/Broker could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Highway Sign - Damaged could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Advocate-Co-opCondo Abatement could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : OEM Disabled Vehicle could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Hazmat Storage/Use could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Highway Sign - Missing could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Advocate-Prop Refunds/Credits could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Radioactive Material could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Cooling Tower could not be fit on page. It will not be
## plotted.

Top 20 complaint types from 2011 to 2017

# Read CSV into R
top20types <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/combine.txt", header=TRUE, sep=",")

top20Total <- top20types %>%
  select(Type,SubTotal)%>%
  drop_na() %>%
  group_by(Type) %>%
  summarize(
    Total = sum(SubTotal)
  )%>%
  arrange(-Total)%>%
  top_n(n = 20, Total)

ggplot(top20Total, aes(x=top20Total$Type, top20Total$Total, color=top20Total$Total)) + geom_bar(stat = "identity",fill='lightblue', color="lightblue") + xlab("Complaint Types") +
  ylab("Frequency") + 
  theme(axis.text.x=element_text(angle=90,hjust=0.2,vjust=0.2))

General correlation analysis for complain types

#transfor long form to wide form
wide_top20types <- spread(top20types,Type,SubTotal)
#wide_top20types 

wide_top20types <- wide_top20types[,2:23]


#calculate correlation matrix
res <- cor(wide_top20types, use = "complete.obs")
round(res, 2)
##                               Blocked Driveway Damaged Tree
## Blocked Driveway                          1.00        -0.27
## Damaged Tree                             -0.27         1.00
## Derelict Vehicles                        -0.23         0.43
## Dirty Conditions                         -0.25         0.33
## DOF Literature Request                    0.08         0.07
## ELECTRIC                                  0.00         0.33
## General Construction/Plumbing             0.03         0.33
## HEAT/HOT WATER                            0.32        -0.37
## Illegal Parking                           0.35         0.24
## Noise - Commercial                        0.09        -0.39
## Noise - Residential                       0.05        -0.39
## Noise - Street/Sidewalk                  -0.32         0.08
## NONCONST                                 -0.24         0.52
## PAINT - PLASTER                           0.05         0.34
## Sewer                                     0.02         0.34
## SNOW                                      0.24        -0.01
## Street Condition                          0.37         0.01
## Street Light Condition                   -0.10         0.36
## TAVG                                     -0.55         0.54
## Traffic Signal Condition                  0.26         0.16
## Unsanitation Condition                   -0.46         0.56
## Water System                             -0.12         0.20
##                               Derelict Vehicles Dirty Conditions
## Blocked Driveway                          -0.23            -0.25
## Damaged Tree                               0.43             0.33
## Derelict Vehicles                          1.00             0.54
## Dirty Conditions                           0.54             1.00
## DOF Literature Request                     0.31             0.31
## ELECTRIC                                   0.56             0.41
## General Construction/Plumbing              0.60             0.40
## HEAT/HOT WATER                            -0.23            -0.17
## Illegal Parking                            0.50             0.22
## Noise - Commercial                        -0.56            -0.42
## Noise - Residential                       -0.63            -0.46
## Noise - Street/Sidewalk                   -0.20            -0.13
## NONCONST                                   0.71             0.51
## PAINT - PLASTER                            0.58             0.39
## Sewer                                      0.16             0.09
## SNOW                                      -0.21            -0.16
## Street Condition                           0.45             0.25
## Street Light Condition                     0.67             0.49
## TAVG                                       0.23             0.22
## Traffic Signal Condition                   0.16             0.14
## Unsanitation Condition                     0.73             0.56
## Water System                               0.21             0.21
##                               DOF Literature Request ELECTRIC
## Blocked Driveway                                0.08     0.00
## Damaged Tree                                    0.07     0.33
## Derelict Vehicles                               0.31     0.56
## Dirty Conditions                                0.31     0.41
## DOF Literature Request                          1.00     0.47
## ELECTRIC                                        0.47     1.00
## General Construction/Plumbing                   0.56     0.88
## HEAT/HOT WATER                                  0.28     0.23
## Illegal Parking                                 0.18     0.40
## Noise - Commercial                             -0.46    -0.77
## Noise - Residential                            -0.55    -0.83
## Noise - Street/Sidewalk                        -0.39    -0.51
## NONCONST                                        0.46     0.77
## PAINT - PLASTER                                 0.53     0.87
## Sewer                                           0.04     0.30
## SNOW                                            0.05     0.14
## Street Condition                                0.11     0.40
## Street Light Condition                          0.59     0.70
## TAVG                                           -0.23    -0.07
## Traffic Signal Condition                        0.51     0.61
## Unsanitation Condition                          0.18     0.43
## Water System                                   -0.03     0.41
##                               General Construction/Plumbing HEAT/HOT WATER
## Blocked Driveway                                       0.03           0.32
## Damaged Tree                                           0.33          -0.37
## Derelict Vehicles                                      0.60          -0.23
## Dirty Conditions                                       0.40          -0.17
## DOF Literature Request                                 0.56           0.28
## ELECTRIC                                               0.88           0.23
## General Construction/Plumbing                          1.00           0.32
## HEAT/HOT WATER                                         0.32           1.00
## Illegal Parking                                        0.42          -0.15
## Noise - Commercial                                    -0.76          -0.12
## Noise - Residential                                   -0.85          -0.19
## Noise - Street/Sidewalk                               -0.57          -0.53
## NONCONST                                               0.79          -0.09
## PAINT - PLASTER                                        0.96           0.26
## Sewer                                                  0.37          -0.06
## SNOW                                                   0.09           0.24
## Street Condition                                       0.49           0.14
## Street Light Condition                                 0.76           0.06
## TAVG                                                  -0.20          -0.80
## Traffic Signal Condition                               0.69           0.51
## Unsanitation Condition                                 0.41          -0.47
## Water System                                           0.24          -0.04
##                               Illegal Parking Noise - Commercial
## Blocked Driveway                         0.35               0.09
## Damaged Tree                             0.24              -0.39
## Derelict Vehicles                        0.50              -0.56
## Dirty Conditions                         0.22              -0.42
## DOF Literature Request                   0.18              -0.46
## ELECTRIC                                 0.40              -0.77
## General Construction/Plumbing            0.42              -0.76
## HEAT/HOT WATER                          -0.15              -0.12
## Illegal Parking                          1.00              -0.30
## Noise - Commercial                      -0.30               1.00
## Noise - Residential                     -0.33               0.82
## Noise - Street/Sidewalk                 -0.10               0.45
## NONCONST                                 0.38              -0.74
## PAINT - PLASTER                          0.42              -0.75
## Sewer                                    0.19              -0.29
## SNOW                                    -0.06              -0.11
## Street Condition                         0.52              -0.35
## Street Light Condition                   0.41              -0.64
## TAVG                                     0.07              -0.03
## Traffic Signal Condition                 0.24              -0.53
## Unsanitation Condition                   0.33              -0.47
## Water System                             0.17              -0.33
##                               Noise - Residential Noise - Street/Sidewalk
## Blocked Driveway                             0.05                   -0.32
## Damaged Tree                                -0.39                    0.08
## Derelict Vehicles                           -0.63                   -0.20
## Dirty Conditions                            -0.46                   -0.13
## DOF Literature Request                      -0.55                   -0.39
## ELECTRIC                                    -0.83                   -0.51
## General Construction/Plumbing               -0.85                   -0.57
## HEAT/HOT WATER                              -0.19                   -0.53
## Illegal Parking                             -0.33                   -0.10
## Noise - Commercial                           0.82                    0.45
## Noise - Residential                          1.00                    0.65
## Noise - Street/Sidewalk                      0.65                    1.00
## NONCONST                                    -0.80                   -0.28
## PAINT - PLASTER                             -0.83                   -0.54
## Sewer                                       -0.31                   -0.23
## SNOW                                        -0.07                   -0.17
## Street Condition                            -0.35                   -0.40
## Street Light Condition                      -0.78                   -0.41
## TAVG                                         0.08                    0.64
## Traffic Signal Condition                    -0.57                   -0.52
## Unsanitation Condition                      -0.49                    0.10
## Water System                                -0.30                   -0.04
##                               NONCONST PAINT - PLASTER Sewer  SNOW
## Blocked Driveway                 -0.24            0.05  0.02  0.24
## Damaged Tree                      0.52            0.34  0.34 -0.01
## Derelict Vehicles                 0.71            0.58  0.16 -0.21
## Dirty Conditions                  0.51            0.39  0.09 -0.16
## DOF Literature Request            0.46            0.53  0.04  0.05
## ELECTRIC                          0.77            0.87  0.30  0.14
## General Construction/Plumbing     0.79            0.96  0.37  0.09
## HEAT/HOT WATER                   -0.09            0.26 -0.06  0.24
## Illegal Parking                   0.38            0.42  0.19 -0.06
## Noise - Commercial               -0.74           -0.75 -0.29 -0.11
## Noise - Residential              -0.80           -0.83 -0.31 -0.07
## Noise - Street/Sidewalk          -0.28           -0.54 -0.23 -0.17
## NONCONST                          1.00            0.79  0.23 -0.05
## PAINT - PLASTER                   0.79            1.00  0.36  0.12
## Sewer                             0.23            0.36  1.00  0.15
## SNOW                             -0.05            0.12  0.15  1.00
## Street Condition                  0.25            0.48  0.21 -0.03
## Street Light Condition            0.71            0.73  0.25 -0.03
## TAVG                              0.27           -0.17  0.03 -0.25
## Traffic Signal Condition          0.37            0.65  0.43  0.30
## Unsanitation Condition            0.73            0.40  0.08 -0.26
## Water System                      0.28            0.22  0.00 -0.05
##                               Street Condition Street Light Condition
## Blocked Driveway                          0.37                  -0.10
## Damaged Tree                              0.01                   0.36
## Derelict Vehicles                         0.45                   0.67
## Dirty Conditions                          0.25                   0.49
## DOF Literature Request                    0.11                   0.59
## ELECTRIC                                  0.40                   0.70
## General Construction/Plumbing             0.49                   0.76
## HEAT/HOT WATER                            0.14                   0.06
## Illegal Parking                           0.52                   0.41
## Noise - Commercial                       -0.35                  -0.64
## Noise - Residential                      -0.35                  -0.78
## Noise - Street/Sidewalk                  -0.40                  -0.41
## NONCONST                                  0.25                   0.71
## PAINT - PLASTER                           0.48                   0.73
## Sewer                                     0.21                   0.25
## SNOW                                     -0.03                  -0.03
## Street Condition                          1.00                   0.37
## Street Light Condition                    0.37                   1.00
## TAVG                                     -0.32                  -0.02
## Traffic Signal Condition                  0.34                   0.50
## Unsanitation Condition                    0.09                   0.50
## Water System                              0.14                   0.14
##                                TAVG Traffic Signal Condition
## Blocked Driveway              -0.55                     0.26
## Damaged Tree                   0.54                     0.16
## Derelict Vehicles              0.23                     0.16
## Dirty Conditions               0.22                     0.14
## DOF Literature Request        -0.23                     0.51
## ELECTRIC                      -0.07                     0.61
## General Construction/Plumbing -0.20                     0.69
## HEAT/HOT WATER                -0.80                     0.51
## Illegal Parking                0.07                     0.24
## Noise - Commercial            -0.03                    -0.53
## Noise - Residential            0.08                    -0.57
## Noise - Street/Sidewalk        0.64                    -0.52
## NONCONST                       0.27                     0.37
## PAINT - PLASTER               -0.17                     0.65
## Sewer                          0.03                     0.43
## SNOW                          -0.25                     0.30
## Street Condition              -0.32                     0.34
## Street Light Condition        -0.02                     0.50
## TAVG                           1.00                    -0.39
## Traffic Signal Condition      -0.39                     1.00
## Unsanitation Condition         0.61                    -0.05
## Water System                   0.27                     0.11
##                               Unsanitation Condition Water System
## Blocked Driveway                               -0.46        -0.12
## Damaged Tree                                    0.56         0.20
## Derelict Vehicles                               0.73         0.21
## Dirty Conditions                                0.56         0.21
## DOF Literature Request                          0.18        -0.03
## ELECTRIC                                        0.43         0.41
## General Construction/Plumbing                   0.41         0.24
## HEAT/HOT WATER                                 -0.47        -0.04
## Illegal Parking                                 0.33         0.17
## Noise - Commercial                             -0.47        -0.33
## Noise - Residential                            -0.49        -0.30
## Noise - Street/Sidewalk                         0.10        -0.04
## NONCONST                                        0.73         0.28
## PAINT - PLASTER                                 0.40         0.22
## Sewer                                           0.08         0.00
## SNOW                                           -0.26        -0.05
## Street Condition                                0.09         0.14
## Street Light Condition                          0.50         0.14
## TAVG                                            0.61         0.27
## Traffic Signal Condition                       -0.05         0.11
## Unsanitation Condition                          1.00         0.27
## Water System                                    0.27         1.00
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.3
## corrplot 0.84 loaded
# correlogram with hclust reordering
corrplot(res, type="upper", order="hclust")

#Check data 
#save correlation matrix to long form
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
long_res <- melt(res)


names(wide_top20types)
##  [1] "Blocked Driveway"              "Damaged Tree"                 
##  [3] "Derelict Vehicles"             "Dirty Conditions"             
##  [5] "DOF Literature Request"        "ELECTRIC"                     
##  [7] "General Construction/Plumbing" "HEAT/HOT WATER"               
##  [9] "Illegal Parking"               "Noise - Commercial"           
## [11] "Noise - Residential"           "Noise - Street/Sidewalk"      
## [13] "NONCONST"                      "PAINT - PLASTER"              
## [15] "Sewer"                         "SNOW"                         
## [17] "Street Condition"              "Street Light Condition"       
## [19] "TAVG"                          "Traffic Signal Condition"     
## [21] "Unsanitation Condition"        "Water System"
x <- wide_top20types[,c(17,3)]
a<-x[complete.cases(x),]
names(a)
## [1] "Street Condition"  "Derelict Vehicles"
summary(a)
##  Street Condition Derelict Vehicles
##  Min.   :  15.0   Min.   :  4.0    
##  1st Qu.: 149.0   1st Qu.: 58.0    
##  Median : 238.0   Median : 85.0    
##  Mean   : 263.6   Mean   :101.9    
##  3rd Qu.: 331.0   3rd Qu.:135.0    
##  Max.   :1387.0   Max.   :317.0
write.csv(a, "generalCorrelationCompType.csv")

Correlation of average temperture (TAVG) vs top 10 complaint types per zip code

library(dplyr)

df_matrix_corr_per_zip <- list()

#remove zip codes which do not have good estimation of top 10 correlations
 remove <- c (10471,10475,10464,11239,10020,10004,10005,10006,10007,10280, 10044,11363,11359,11360, 11104,11693,11694,11695, 11697,11004,11005, 11411,11426, 11427, 11428,10307,10314)
 zipCode <- zipCode [! zipCode %in% remove]


for(i in 1:length(zipCode)) {  
  #filter data by zip code
 
  data_by_zip <- raw_call311_2013to2017 %>%
    select(Incident.Zip, CreatedDate, Type, Freq) %>%
    drop_na() %>%
    filter(Incident.Zip == zipCode[i])
  
  #fix freq = sum(freq) after fxing duplicated complaint type in converison 
  data_by_zip <- data_by_zip %>%
    group_by(Incident.Zip, CreatedDate, Type) %>%
    summarize(
      Freq = sum(Freq)
    )
  
  #calculate top 10 complaint type frequency 
  data_by_zip_total <- data_by_zip %>%
    group_by (Type) %>%
    summarize(
      Total = sum(Freq)
    )
  
  
 data_by_zip_total <- data_by_zip_total %>%
   top_n(n = 10, Total )
  
 #add weather back to the list
 
 Comptype <- as.character(data_by_zip_total$Type)
  data_by_zip <- data_by_zip %>%
    filter (Type %in% c(Comptype, "TAVG","SNOW"))

 
  #transform long form to wide form
   data_by_zip <- spread(data_by_zip,Type,Freq)
   data_by_zip <- data_by_zip %>%
     drop_na() 
   data_by_zip <-data_by_zip[,3:dim(data_by_zip)[2]]
  #calculate correlation by zip code
  cor_data_by_zip <- cor(data_by_zip, use = "complete.obs")
  #save correlation matrix by zip code to a list
  df_matrix_corr_per_zip[[i]] <- cor_data_by_zip

}
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero

## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
 #output correlation matrix by zip code for interputation
 for(i in 1:length(zipCode)) {
   
   tmp_rec <- melt(df_matrix_corr_per_zip[[i]])
   tmp_rec$zipCode <- zipCode[i]
   write.table(tmp_rec, file="corr.csv",sep = ",", append = TRUE)
 }
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file

Excel file corr.csv analysis

load interputation

weather_compType_zip <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/WeatherCorrelationFinding.csv", header=TRUE, sep=",") 

library(knitr)
kable(weather_compType_zip)
Var1.Var2.value.zipCode.Boro.Neighbor.Analysis
TAVG Homeless Person Assistance Positive 10010 Manhattan Gramercy Park and Murray Hill outdoor homeless
TAVG Noise - Residential Positive 10010 Manhattan Gramercy Park and Murray Hill Parties create noise
TAVG General Construction/Plumbing Positive 10010 Manhattan Gramercy Park and Murray Hill high usage of water
TAVG Broken Muni Meter Positive 10010 Manhattan Gramercy Park and Murray Hill high usage of muni meter
TAVG Homeless Person Assistance Positive 10017 Manhattan Gramercy Park and Murray Hill outdoor homeless
TAVG Illegal Parking Positive 10017 Manhattan Gramercy Park and Murray Hill hard to find legal parking spot during summer time
TAVG Broken Muni Meter Positive 10017 Manhattan Gramercy Park and Murray Hill high usage of muni meter
TAVG Noise - Residential Positive 10017 Manhattan Gramercy Park and Murray Hill Parties create noise
TAVG HEAT/HOT WATER Negative 10032 Manhattan Inwood and Washington Heights need heat in cold temperture
TAVG DOOR/WINDOW Positive 10039 Manhattan Central Harlem buildings are lack of repair
TAVG PAINT - PLASTER Positive 10039 Manhattan Central Harlem buildings are lack of repair
TAVG HEAT/HOT WATER Positive 10039 Manhattan Central Harlem buildings are lack of repair
TAVG HEAT/HOT WATER Positive 10065 Manhattan Upper East Side buildings are lack of repair
TAVG Electronics Waste Negative 10308 Staten Island South Shore when temperturns is low, less frequent for garbage collection
TAVG Missed Collection (All Materials) Negative 10308 Staten Island South Shore when temperturns is low, less frequent for garbage collection
TAVG HEAT/HOT WATER Negative 10455 Bronx Hunts Point and Mott Haven need heat in cold temperture
TAVG HEAT/HOT WATER Negative 10460 Bronx Central Bronx need heat in cold temperture
TAVG HEAT/HOT WATER Negative 10466 Bronx Northeast Bronx need heat in cold temperture
TAVG Unsanitation Condition Negative 10474 Bronx Hunts Point and Mott Haven when temperturns is low, less frequent for garbage collection or remove snow
TAVG Blocked Driveway Negative 10474 Bronx Hunts Point and Mott Haven people don’t move their cars in snow day
TAVG Noise - Residential Positive 10474 Bronx Hunts Point and Mott Haven Parties create noise
TAVG Street Condition Positive 10474 Bronx Hunts Point and Mott Haven smelly or dirty in summer
TAVG Unsanitation Condition Positive 10474 Bronx Hunts Point and Mott Haven smelly or dirty in summer
TAVG General Construction/Plumbing Negative 11105 Queens Northwest Queens broken sewer or unfinished constructure
TAVG HEAT/HOT WATER Positive 11105 Queens Northwest Queens buildings are lack of repair
TAVG Street Light Condition Positive 11106 Queens Northwest Queens Outdoor pedestrian file complaint
TAVG Traffic Signal Condition Positive 11106 Queens Northwest Queens Outdoor pedestrian file complaint
TAVG HEAT/HOT WATER Negative 11224 Brooklyn Southern Brooklyn need heat in cold temperture
TAVG General Construction/Plumbing Positive 11224 Brooklyn Southern Brooklyn frequent new constructure
TAVG Water System Positive 11224 Brooklyn Southern Brooklyn frequent new constructure
TAVG Blocked Driveway Negative 11356 Queens North Queens people don’t move their cars in snow day
TAVG Street Condition Negative 11356 Queens North Queens when temperturns is low, less frequent for garbage collection or remove snow
TAVG Street Light Condition Negative 11356 Queens North Queens broken street light in cold condition or no light in the dark
TAVG Blocked Driveway Positive 11356 Queens North Queens parties create illegal parking or more people go out in summer time
TAVG Building/Use Positive 11356 Queens North Queens Neighbors discover more illegal construstions
TAVG Derelict Vehicles Positive 11356 Queens North Queens Neighbors discover more abandoned cars
TAVG General Construction/Plumbing Positive 11419 Queens Southwest Queens frequent new constructure

Correlation of Population and median household income by zip code

#Prepare raw data sets: read in csv or txt files into R 

pop_inc_zip2017 <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/2017_ComplaintFreq_pop_inc.csv", header=TRUE, sep=",")

#transform long form to wide form
pop_inc_zip2017 <- spread(pop_inc_zip2017,Type,Value)


write.csv(pop_inc_zip2017,"D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/hypothsisTest_High_Inc.csv")

pop_inc_zip2017 <- pop_inc_zip2017[,2:dim(pop_inc_zip2017)[2]]

#calculate correlation by zip code
  cor_pop_inc_zip2017 <- cor(pop_inc_zip2017, use = "complete.obs")

library(corrplot)
# correlogram with hclust reordering
corrplot(cor_pop_inc_zip2017, type="upper", order="hclust")

hypothesis test: Does Higher-income have less number of complaints?

We have 178 mid-household income by zip code in NYC and frequency of 311 calls per zip code in 2017. we use 95% z-test in single tail to validate whether to accept or reject hypothesis.

hypothsis test : Ho: # of complaints of Higher avg income > # of complaints of avg income Ha : # of complaints of Higher avg income <= # of complaints of avg income

Conclution: reject Ho

hypothysis_pop_inc <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/hypothsisTest_High_Inc.csv", header=TRUE, sep=",")

hypothysis_pop_inc <- hypothysis_pop_inc %>%
  select(Zipcode,AvgIncome,ComplaintFreq,Population)%>%
  drop_na()%>%
  filter(AvgIncome != 0, Population !=1 )

averagePopulationIncome= mean(hypothysis_pop_inc$AvgIncome)
averagePopulationComplaintFreq= mean(hypothysis_pop_inc$ComplaintFreq)

highIncome <- hypothysis_pop_inc %>%
    filter(hypothysis_pop_inc$AvgIncome >averagePopulationIncome)

averagehighIncomeComplaintFreq= mean(highIncome$ComplaintFreq)
numHighIncome <- dim(highIncome)[1]

sdHighIncomeComplainFreq <- sd(highIncome$ComplaintFreq)

#Find z score of high midian household income complaint frequency
z_score_highIncomComplainFreq <- (averagehighIncomeComplaintFreq-averagePopulationComplaintFreq)/(sdHighIncomeComplainFreq/sqrt(numHighIncome))


#p-value of high income complaint frequency 
pnorm(z_score_highIncomComplainFreq, lower.tail = TRUE)
## [1] 1.069003e-09

Linear regression

Average Temperture VS Heat/Hot water complaint frequency.

(daily record is an observation)

raw_call311_2013to2017 <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/CleanZipCodePerDayComplaintFreq.txt", header=TRUE, sep=",")

#get complaint type = Heat/Hot water total frequency per day

#get average temperture per day (use zipcode 10001 as example)

library(dplyr)
x <- raw_call311_2013to2017 %>%
  filter ( Type == 'HEAT/HOT WATER') %>%
  group_by(CreatedDate, Type) %>%
  summarize(
    Freq = sum(Freq)
  )

y <- raw_call311_2013to2017 %>%
    drop_na() %>%
  filter ( Type == "TAVG"  , Incident.Zip=="10001") %>%
    select(CreatedDate, Type, Freq)

y$Freq <- round(y$Freq,0)

xy<- rbind(as.matrix(x),as.matrix(y))

xy <- spread(data.frame(xy), Type, Freq)

xy$yyyymm <- format(as.Date(xy$CreatedDate, "%m/%d/%Y"), "%Y%m" )

xy <- xy %>% drop_na()

#Month heat/hot water total count
#Avgerage temperture of the month

xy$`HEAT/HOT WATER` <- as.numeric(as.character(xy$`HEAT/HOT WATER`))
xy$TAVG <- as.numeric(as.character(xy$TAVG))


xy2 <- xy %>%
  group_by(yyyymm) %>%
  summarize(
    Month_Sum = sum(`HEAT/HOT WATER`),
    Month_TAVG = mean(TAVG)
  )


lm(xy2$Month_Sum ~ xy2$Month_TAVG)
## 
## Call:
## lm(formula = xy2$Month_Sum ~ xy2$Month_TAVG)
## 
## Coefficients:
##    (Intercept)  xy2$Month_TAVG  
##        68728.2          -905.1
# numberof Month Heat/Hot Water Complaint = 68728.2 - 905.1* number of Month_TAVG 
# For example, numberof Month Heat/Hot Water Complaint is 23473 when averger temperture is 50 degree.

#Prodetive Modeling - linear regression
plot(xy2$Month_Sum,xy2$Month_TAVG, main="Monthly TAVG-Heat/Hot Water Complaint Frequency", 
    xlab="Monthly Complaint ", ylab="Monthly Average Temperture (TAVG)" )
lines(lowess(xy2$Month_Sum , xy2$Month_TAVG), col="blue") # lowess line (x,y)