#Download required packages
library(XML)
library(RCurl)
## Loading required package: bitops
library(xlsx)
## Loading required package: rJava
##
## Attaching package: 'rJava'
## The following object is masked from 'package:RCurl':
##
## clone
## Loading required package: xlsxjars
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
##
## xml
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
##
## complete
library(ggplot2)
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.3
## Loading required package: RColorBrewer
download data from NYC govement open data
311 Service Requests from 2010 to Present https://data.cityofnewyork.us/dataset/311-Service-Requests-From-2011/fpz8-jqf4 Median Household Income 2017 by zip code http://zipatlas.com/us/ny/zip-code-comparison/median-household-income.htm NYC Weather, search New York, NY for area of interest https://www.ncdc.noaa.gov/cdo-web/confirmation
Transformed data- CleanZipCodePerDayComplaintFreq.txt
#Data set: we need to create a table which contains daily weather, complaint types by zip code.
#Prepare raw data sets: read in csv or txt files into R
raw_call311_2013to2017 <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/CleanZipCodePerDayComplaintFreq.txt", header=TRUE, sep=",")
raw_nyc_zipcode <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/Zipcode_tidy.csv", header=TRUE, sep=",")
#Clean Zip Code: filter 311 Calls insident zip code by New York zip code table
zipCode <- raw_nyc_zipcode$Zip.Code
word_count <- raw_call311_2013to2017 %>%
drop_na() %>%
filter(Type != "TAVG") %>%
group_by(Type) %>%
summarize(
freq = sum(Freq)
)
set.seed(1234)
wordcloud(words = word_count$Type, freq = word_count$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.1
)
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Noise - Residential could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : HEAT/HOT WATER could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : General Construction/Plumbing could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DOF Property - Property Value could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Beach/Pool/Sauna Complaint could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Home Delivered Meal - Missed Delivery could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Unsanitary Animal Facility could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Advocate-Personal Exemptions could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Municipal Parking Facility could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Home Delivered Meal Complaint could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Special Natural Area District (SNAD) could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Animal Facility - No Permit could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Illegal Animal Sold could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DOF Property - City Rebate could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Overflowing Recycling Baskets could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Home Care Provider Complaint could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Case Management Agency Complaint could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Bereavement Support Group could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DHS Advantage - Tenant could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Sprinkler - Mechanical could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Rangehood could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Transportation Provider Complaint could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Calorie Labeling could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Legal Services Provider Complaint could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : X-Ray Machine/Equipment could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : DHS Advantage -Landlord/Broker could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Highway Sign - Damaged could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Advocate-Co-opCondo Abatement could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : OEM Disabled Vehicle could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Hazmat Storage/Use could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Highway Sign - Missing could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Advocate-Prop Refunds/Credits could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Radioactive Material could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = word_count$Type, freq = word_count$freq,
## min.freq = 1, : Cooling Tower could not be fit on page. It will not be
## plotted.
# Read CSV into R
top20types <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/combine.txt", header=TRUE, sep=",")
top20Total <- top20types %>%
select(Type,SubTotal)%>%
drop_na() %>%
group_by(Type) %>%
summarize(
Total = sum(SubTotal)
)%>%
arrange(-Total)%>%
top_n(n = 20, Total)
ggplot(top20Total, aes(x=top20Total$Type, top20Total$Total, color=top20Total$Total)) + geom_bar(stat = "identity",fill='lightblue', color="lightblue") + xlab("Complaint Types") +
ylab("Frequency") +
theme(axis.text.x=element_text(angle=90,hjust=0.2,vjust=0.2))
#transfor long form to wide form
wide_top20types <- spread(top20types,Type,SubTotal)
#wide_top20types
wide_top20types <- wide_top20types[,2:23]
#calculate correlation matrix
res <- cor(wide_top20types, use = "complete.obs")
round(res, 2)
## Blocked Driveway Damaged Tree
## Blocked Driveway 1.00 -0.27
## Damaged Tree -0.27 1.00
## Derelict Vehicles -0.23 0.43
## Dirty Conditions -0.25 0.33
## DOF Literature Request 0.08 0.07
## ELECTRIC 0.00 0.33
## General Construction/Plumbing 0.03 0.33
## HEAT/HOT WATER 0.32 -0.37
## Illegal Parking 0.35 0.24
## Noise - Commercial 0.09 -0.39
## Noise - Residential 0.05 -0.39
## Noise - Street/Sidewalk -0.32 0.08
## NONCONST -0.24 0.52
## PAINT - PLASTER 0.05 0.34
## Sewer 0.02 0.34
## SNOW 0.24 -0.01
## Street Condition 0.37 0.01
## Street Light Condition -0.10 0.36
## TAVG -0.55 0.54
## Traffic Signal Condition 0.26 0.16
## Unsanitation Condition -0.46 0.56
## Water System -0.12 0.20
## Derelict Vehicles Dirty Conditions
## Blocked Driveway -0.23 -0.25
## Damaged Tree 0.43 0.33
## Derelict Vehicles 1.00 0.54
## Dirty Conditions 0.54 1.00
## DOF Literature Request 0.31 0.31
## ELECTRIC 0.56 0.41
## General Construction/Plumbing 0.60 0.40
## HEAT/HOT WATER -0.23 -0.17
## Illegal Parking 0.50 0.22
## Noise - Commercial -0.56 -0.42
## Noise - Residential -0.63 -0.46
## Noise - Street/Sidewalk -0.20 -0.13
## NONCONST 0.71 0.51
## PAINT - PLASTER 0.58 0.39
## Sewer 0.16 0.09
## SNOW -0.21 -0.16
## Street Condition 0.45 0.25
## Street Light Condition 0.67 0.49
## TAVG 0.23 0.22
## Traffic Signal Condition 0.16 0.14
## Unsanitation Condition 0.73 0.56
## Water System 0.21 0.21
## DOF Literature Request ELECTRIC
## Blocked Driveway 0.08 0.00
## Damaged Tree 0.07 0.33
## Derelict Vehicles 0.31 0.56
## Dirty Conditions 0.31 0.41
## DOF Literature Request 1.00 0.47
## ELECTRIC 0.47 1.00
## General Construction/Plumbing 0.56 0.88
## HEAT/HOT WATER 0.28 0.23
## Illegal Parking 0.18 0.40
## Noise - Commercial -0.46 -0.77
## Noise - Residential -0.55 -0.83
## Noise - Street/Sidewalk -0.39 -0.51
## NONCONST 0.46 0.77
## PAINT - PLASTER 0.53 0.87
## Sewer 0.04 0.30
## SNOW 0.05 0.14
## Street Condition 0.11 0.40
## Street Light Condition 0.59 0.70
## TAVG -0.23 -0.07
## Traffic Signal Condition 0.51 0.61
## Unsanitation Condition 0.18 0.43
## Water System -0.03 0.41
## General Construction/Plumbing HEAT/HOT WATER
## Blocked Driveway 0.03 0.32
## Damaged Tree 0.33 -0.37
## Derelict Vehicles 0.60 -0.23
## Dirty Conditions 0.40 -0.17
## DOF Literature Request 0.56 0.28
## ELECTRIC 0.88 0.23
## General Construction/Plumbing 1.00 0.32
## HEAT/HOT WATER 0.32 1.00
## Illegal Parking 0.42 -0.15
## Noise - Commercial -0.76 -0.12
## Noise - Residential -0.85 -0.19
## Noise - Street/Sidewalk -0.57 -0.53
## NONCONST 0.79 -0.09
## PAINT - PLASTER 0.96 0.26
## Sewer 0.37 -0.06
## SNOW 0.09 0.24
## Street Condition 0.49 0.14
## Street Light Condition 0.76 0.06
## TAVG -0.20 -0.80
## Traffic Signal Condition 0.69 0.51
## Unsanitation Condition 0.41 -0.47
## Water System 0.24 -0.04
## Illegal Parking Noise - Commercial
## Blocked Driveway 0.35 0.09
## Damaged Tree 0.24 -0.39
## Derelict Vehicles 0.50 -0.56
## Dirty Conditions 0.22 -0.42
## DOF Literature Request 0.18 -0.46
## ELECTRIC 0.40 -0.77
## General Construction/Plumbing 0.42 -0.76
## HEAT/HOT WATER -0.15 -0.12
## Illegal Parking 1.00 -0.30
## Noise - Commercial -0.30 1.00
## Noise - Residential -0.33 0.82
## Noise - Street/Sidewalk -0.10 0.45
## NONCONST 0.38 -0.74
## PAINT - PLASTER 0.42 -0.75
## Sewer 0.19 -0.29
## SNOW -0.06 -0.11
## Street Condition 0.52 -0.35
## Street Light Condition 0.41 -0.64
## TAVG 0.07 -0.03
## Traffic Signal Condition 0.24 -0.53
## Unsanitation Condition 0.33 -0.47
## Water System 0.17 -0.33
## Noise - Residential Noise - Street/Sidewalk
## Blocked Driveway 0.05 -0.32
## Damaged Tree -0.39 0.08
## Derelict Vehicles -0.63 -0.20
## Dirty Conditions -0.46 -0.13
## DOF Literature Request -0.55 -0.39
## ELECTRIC -0.83 -0.51
## General Construction/Plumbing -0.85 -0.57
## HEAT/HOT WATER -0.19 -0.53
## Illegal Parking -0.33 -0.10
## Noise - Commercial 0.82 0.45
## Noise - Residential 1.00 0.65
## Noise - Street/Sidewalk 0.65 1.00
## NONCONST -0.80 -0.28
## PAINT - PLASTER -0.83 -0.54
## Sewer -0.31 -0.23
## SNOW -0.07 -0.17
## Street Condition -0.35 -0.40
## Street Light Condition -0.78 -0.41
## TAVG 0.08 0.64
## Traffic Signal Condition -0.57 -0.52
## Unsanitation Condition -0.49 0.10
## Water System -0.30 -0.04
## NONCONST PAINT - PLASTER Sewer SNOW
## Blocked Driveway -0.24 0.05 0.02 0.24
## Damaged Tree 0.52 0.34 0.34 -0.01
## Derelict Vehicles 0.71 0.58 0.16 -0.21
## Dirty Conditions 0.51 0.39 0.09 -0.16
## DOF Literature Request 0.46 0.53 0.04 0.05
## ELECTRIC 0.77 0.87 0.30 0.14
## General Construction/Plumbing 0.79 0.96 0.37 0.09
## HEAT/HOT WATER -0.09 0.26 -0.06 0.24
## Illegal Parking 0.38 0.42 0.19 -0.06
## Noise - Commercial -0.74 -0.75 -0.29 -0.11
## Noise - Residential -0.80 -0.83 -0.31 -0.07
## Noise - Street/Sidewalk -0.28 -0.54 -0.23 -0.17
## NONCONST 1.00 0.79 0.23 -0.05
## PAINT - PLASTER 0.79 1.00 0.36 0.12
## Sewer 0.23 0.36 1.00 0.15
## SNOW -0.05 0.12 0.15 1.00
## Street Condition 0.25 0.48 0.21 -0.03
## Street Light Condition 0.71 0.73 0.25 -0.03
## TAVG 0.27 -0.17 0.03 -0.25
## Traffic Signal Condition 0.37 0.65 0.43 0.30
## Unsanitation Condition 0.73 0.40 0.08 -0.26
## Water System 0.28 0.22 0.00 -0.05
## Street Condition Street Light Condition
## Blocked Driveway 0.37 -0.10
## Damaged Tree 0.01 0.36
## Derelict Vehicles 0.45 0.67
## Dirty Conditions 0.25 0.49
## DOF Literature Request 0.11 0.59
## ELECTRIC 0.40 0.70
## General Construction/Plumbing 0.49 0.76
## HEAT/HOT WATER 0.14 0.06
## Illegal Parking 0.52 0.41
## Noise - Commercial -0.35 -0.64
## Noise - Residential -0.35 -0.78
## Noise - Street/Sidewalk -0.40 -0.41
## NONCONST 0.25 0.71
## PAINT - PLASTER 0.48 0.73
## Sewer 0.21 0.25
## SNOW -0.03 -0.03
## Street Condition 1.00 0.37
## Street Light Condition 0.37 1.00
## TAVG -0.32 -0.02
## Traffic Signal Condition 0.34 0.50
## Unsanitation Condition 0.09 0.50
## Water System 0.14 0.14
## TAVG Traffic Signal Condition
## Blocked Driveway -0.55 0.26
## Damaged Tree 0.54 0.16
## Derelict Vehicles 0.23 0.16
## Dirty Conditions 0.22 0.14
## DOF Literature Request -0.23 0.51
## ELECTRIC -0.07 0.61
## General Construction/Plumbing -0.20 0.69
## HEAT/HOT WATER -0.80 0.51
## Illegal Parking 0.07 0.24
## Noise - Commercial -0.03 -0.53
## Noise - Residential 0.08 -0.57
## Noise - Street/Sidewalk 0.64 -0.52
## NONCONST 0.27 0.37
## PAINT - PLASTER -0.17 0.65
## Sewer 0.03 0.43
## SNOW -0.25 0.30
## Street Condition -0.32 0.34
## Street Light Condition -0.02 0.50
## TAVG 1.00 -0.39
## Traffic Signal Condition -0.39 1.00
## Unsanitation Condition 0.61 -0.05
## Water System 0.27 0.11
## Unsanitation Condition Water System
## Blocked Driveway -0.46 -0.12
## Damaged Tree 0.56 0.20
## Derelict Vehicles 0.73 0.21
## Dirty Conditions 0.56 0.21
## DOF Literature Request 0.18 -0.03
## ELECTRIC 0.43 0.41
## General Construction/Plumbing 0.41 0.24
## HEAT/HOT WATER -0.47 -0.04
## Illegal Parking 0.33 0.17
## Noise - Commercial -0.47 -0.33
## Noise - Residential -0.49 -0.30
## Noise - Street/Sidewalk 0.10 -0.04
## NONCONST 0.73 0.28
## PAINT - PLASTER 0.40 0.22
## Sewer 0.08 0.00
## SNOW -0.26 -0.05
## Street Condition 0.09 0.14
## Street Light Condition 0.50 0.14
## TAVG 0.61 0.27
## Traffic Signal Condition -0.05 0.11
## Unsanitation Condition 1.00 0.27
## Water System 0.27 1.00
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.3
## corrplot 0.84 loaded
# correlogram with hclust reordering
corrplot(res, type="upper", order="hclust")
#Check data
#save correlation matrix to long form
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
long_res <- melt(res)
names(wide_top20types)
## [1] "Blocked Driveway" "Damaged Tree"
## [3] "Derelict Vehicles" "Dirty Conditions"
## [5] "DOF Literature Request" "ELECTRIC"
## [7] "General Construction/Plumbing" "HEAT/HOT WATER"
## [9] "Illegal Parking" "Noise - Commercial"
## [11] "Noise - Residential" "Noise - Street/Sidewalk"
## [13] "NONCONST" "PAINT - PLASTER"
## [15] "Sewer" "SNOW"
## [17] "Street Condition" "Street Light Condition"
## [19] "TAVG" "Traffic Signal Condition"
## [21] "Unsanitation Condition" "Water System"
x <- wide_top20types[,c(17,3)]
a<-x[complete.cases(x),]
names(a)
## [1] "Street Condition" "Derelict Vehicles"
summary(a)
## Street Condition Derelict Vehicles
## Min. : 15.0 Min. : 4.0
## 1st Qu.: 149.0 1st Qu.: 58.0
## Median : 238.0 Median : 85.0
## Mean : 263.6 Mean :101.9
## 3rd Qu.: 331.0 3rd Qu.:135.0
## Max. :1387.0 Max. :317.0
write.csv(a, "generalCorrelationCompType.csv")
library(dplyr)
df_matrix_corr_per_zip <- list()
#remove zip codes which do not have good estimation of top 10 correlations
remove <- c (10471,10475,10464,11239,10020,10004,10005,10006,10007,10280, 10044,11363,11359,11360, 11104,11693,11694,11695, 11697,11004,11005, 11411,11426, 11427, 11428,10307,10314)
zipCode <- zipCode [! zipCode %in% remove]
for(i in 1:length(zipCode)) {
#filter data by zip code
data_by_zip <- raw_call311_2013to2017 %>%
select(Incident.Zip, CreatedDate, Type, Freq) %>%
drop_na() %>%
filter(Incident.Zip == zipCode[i])
#fix freq = sum(freq) after fxing duplicated complaint type in converison
data_by_zip <- data_by_zip %>%
group_by(Incident.Zip, CreatedDate, Type) %>%
summarize(
Freq = sum(Freq)
)
#calculate top 10 complaint type frequency
data_by_zip_total <- data_by_zip %>%
group_by (Type) %>%
summarize(
Total = sum(Freq)
)
data_by_zip_total <- data_by_zip_total %>%
top_n(n = 10, Total )
#add weather back to the list
Comptype <- as.character(data_by_zip_total$Type)
data_by_zip <- data_by_zip %>%
filter (Type %in% c(Comptype, "TAVG","SNOW"))
#transform long form to wide form
data_by_zip <- spread(data_by_zip,Type,Freq)
data_by_zip <- data_by_zip %>%
drop_na()
data_by_zip <-data_by_zip[,3:dim(data_by_zip)[2]]
#calculate correlation by zip code
cor_data_by_zip <- cor(data_by_zip, use = "complete.obs")
#save correlation matrix by zip code to a list
df_matrix_corr_per_zip[[i]] <- cor_data_by_zip
}
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(data_by_zip, use = "complete.obs"): the standard deviation
## is zero
#output correlation matrix by zip code for interputation
for(i in 1:length(zipCode)) {
tmp_rec <- melt(df_matrix_corr_per_zip[[i]])
tmp_rec$zipCode <- zipCode[i]
write.table(tmp_rec, file="corr.csv",sep = ",", append = TRUE)
}
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
## Warning in write.table(tmp_rec, file = "corr.csv", sep = ",", append =
## TRUE): appending column names to file
load interputation
weather_compType_zip <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/WeatherCorrelationFinding.csv", header=TRUE, sep=",")
library(knitr)
kable(weather_compType_zip)
| Var1.Var2.value.zipCode.Boro.Neighbor.Analysis |
|---|
| TAVG Homeless Person Assistance Positive 10010 Manhattan Gramercy Park and Murray Hill outdoor homeless |
| TAVG Noise - Residential Positive 10010 Manhattan Gramercy Park and Murray Hill Parties create noise |
| TAVG General Construction/Plumbing Positive 10010 Manhattan Gramercy Park and Murray Hill high usage of water |
| TAVG Broken Muni Meter Positive 10010 Manhattan Gramercy Park and Murray Hill high usage of muni meter |
| TAVG Homeless Person Assistance Positive 10017 Manhattan Gramercy Park and Murray Hill outdoor homeless |
| TAVG Illegal Parking Positive 10017 Manhattan Gramercy Park and Murray Hill hard to find legal parking spot during summer time |
| TAVG Broken Muni Meter Positive 10017 Manhattan Gramercy Park and Murray Hill high usage of muni meter |
| TAVG Noise - Residential Positive 10017 Manhattan Gramercy Park and Murray Hill Parties create noise |
| TAVG HEAT/HOT WATER Negative 10032 Manhattan Inwood and Washington Heights need heat in cold temperture |
| TAVG DOOR/WINDOW Positive 10039 Manhattan Central Harlem buildings are lack of repair |
| TAVG PAINT - PLASTER Positive 10039 Manhattan Central Harlem buildings are lack of repair |
| TAVG HEAT/HOT WATER Positive 10039 Manhattan Central Harlem buildings are lack of repair |
| TAVG HEAT/HOT WATER Positive 10065 Manhattan Upper East Side buildings are lack of repair |
| TAVG Electronics Waste Negative 10308 Staten Island South Shore when temperturns is low, less frequent for garbage collection |
| TAVG Missed Collection (All Materials) Negative 10308 Staten Island South Shore when temperturns is low, less frequent for garbage collection |
| TAVG HEAT/HOT WATER Negative 10455 Bronx Hunts Point and Mott Haven need heat in cold temperture |
| TAVG HEAT/HOT WATER Negative 10460 Bronx Central Bronx need heat in cold temperture |
| TAVG HEAT/HOT WATER Negative 10466 Bronx Northeast Bronx need heat in cold temperture |
| TAVG Unsanitation Condition Negative 10474 Bronx Hunts Point and Mott Haven when temperturns is low, less frequent for garbage collection or remove snow |
| TAVG Blocked Driveway Negative 10474 Bronx Hunts Point and Mott Haven people don’t move their cars in snow day |
| TAVG Noise - Residential Positive 10474 Bronx Hunts Point and Mott Haven Parties create noise |
| TAVG Street Condition Positive 10474 Bronx Hunts Point and Mott Haven smelly or dirty in summer |
| TAVG Unsanitation Condition Positive 10474 Bronx Hunts Point and Mott Haven smelly or dirty in summer |
| TAVG General Construction/Plumbing Negative 11105 Queens Northwest Queens broken sewer or unfinished constructure |
| TAVG HEAT/HOT WATER Positive 11105 Queens Northwest Queens buildings are lack of repair |
| TAVG Street Light Condition Positive 11106 Queens Northwest Queens Outdoor pedestrian file complaint |
| TAVG Traffic Signal Condition Positive 11106 Queens Northwest Queens Outdoor pedestrian file complaint |
| TAVG HEAT/HOT WATER Negative 11224 Brooklyn Southern Brooklyn need heat in cold temperture |
| TAVG General Construction/Plumbing Positive 11224 Brooklyn Southern Brooklyn frequent new constructure |
| TAVG Water System Positive 11224 Brooklyn Southern Brooklyn frequent new constructure |
| TAVG Blocked Driveway Negative 11356 Queens North Queens people don’t move their cars in snow day |
| TAVG Street Condition Negative 11356 Queens North Queens when temperturns is low, less frequent for garbage collection or remove snow |
| TAVG Street Light Condition Negative 11356 Queens North Queens broken street light in cold condition or no light in the dark |
| TAVG Blocked Driveway Positive 11356 Queens North Queens parties create illegal parking or more people go out in summer time |
| TAVG Building/Use Positive 11356 Queens North Queens Neighbors discover more illegal construstions |
| TAVG Derelict Vehicles Positive 11356 Queens North Queens Neighbors discover more abandoned cars |
| TAVG General Construction/Plumbing Positive 11419 Queens Southwest Queens frequent new constructure |
#Prepare raw data sets: read in csv or txt files into R
pop_inc_zip2017 <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/2017_ComplaintFreq_pop_inc.csv", header=TRUE, sep=",")
#transform long form to wide form
pop_inc_zip2017 <- spread(pop_inc_zip2017,Type,Value)
write.csv(pop_inc_zip2017,"D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/hypothsisTest_High_Inc.csv")
pop_inc_zip2017 <- pop_inc_zip2017[,2:dim(pop_inc_zip2017)[2]]
#calculate correlation by zip code
cor_pop_inc_zip2017 <- cor(pop_inc_zip2017, use = "complete.obs")
library(corrplot)
# correlogram with hclust reordering
corrplot(cor_pop_inc_zip2017, type="upper", order="hclust")
We have 178 mid-household income by zip code in NYC and frequency of 311 calls per zip code in 2017. we use 95% z-test in single tail to validate whether to accept or reject hypothesis.
hypothsis test : Ho: # of complaints of Higher avg income > # of complaints of avg income Ha : # of complaints of Higher avg income <= # of complaints of avg income
Conclution: reject Ho
hypothysis_pop_inc <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/hypothsisTest_High_Inc.csv", header=TRUE, sep=",")
hypothysis_pop_inc <- hypothysis_pop_inc %>%
select(Zipcode,AvgIncome,ComplaintFreq,Population)%>%
drop_na()%>%
filter(AvgIncome != 0, Population !=1 )
averagePopulationIncome= mean(hypothysis_pop_inc$AvgIncome)
averagePopulationComplaintFreq= mean(hypothysis_pop_inc$ComplaintFreq)
highIncome <- hypothysis_pop_inc %>%
filter(hypothysis_pop_inc$AvgIncome >averagePopulationIncome)
averagehighIncomeComplaintFreq= mean(highIncome$ComplaintFreq)
numHighIncome <- dim(highIncome)[1]
sdHighIncomeComplainFreq <- sd(highIncome$ComplaintFreq)
#Find z score of high midian household income complaint frequency
z_score_highIncomComplainFreq <- (averagehighIncomeComplaintFreq-averagePopulationComplaintFreq)/(sdHighIncomeComplainFreq/sqrt(numHighIncome))
#p-value of high income complaint frequency
pnorm(z_score_highIncomComplainFreq, lower.tail = TRUE)
## [1] 1.069003e-09
(daily record is an observation)
raw_call311_2013to2017 <- read.csv(file="D:/CUNY_SPS_DA/607_Data_Aq/607 Final Project/goodData/CleanZipCodePerDayComplaintFreq.txt", header=TRUE, sep=",")
#get complaint type = Heat/Hot water total frequency per day
#get average temperture per day (use zipcode 10001 as example)
library(dplyr)
x <- raw_call311_2013to2017 %>%
filter ( Type == 'HEAT/HOT WATER') %>%
group_by(CreatedDate, Type) %>%
summarize(
Freq = sum(Freq)
)
y <- raw_call311_2013to2017 %>%
drop_na() %>%
filter ( Type == "TAVG" , Incident.Zip=="10001") %>%
select(CreatedDate, Type, Freq)
y$Freq <- round(y$Freq,0)
xy<- rbind(as.matrix(x),as.matrix(y))
xy <- spread(data.frame(xy), Type, Freq)
xy$yyyymm <- format(as.Date(xy$CreatedDate, "%m/%d/%Y"), "%Y%m" )
xy <- xy %>% drop_na()
#Month heat/hot water total count
#Avgerage temperture of the month
xy$`HEAT/HOT WATER` <- as.numeric(as.character(xy$`HEAT/HOT WATER`))
xy$TAVG <- as.numeric(as.character(xy$TAVG))
xy2 <- xy %>%
group_by(yyyymm) %>%
summarize(
Month_Sum = sum(`HEAT/HOT WATER`),
Month_TAVG = mean(TAVG)
)
lm(xy2$Month_Sum ~ xy2$Month_TAVG)
##
## Call:
## lm(formula = xy2$Month_Sum ~ xy2$Month_TAVG)
##
## Coefficients:
## (Intercept) xy2$Month_TAVG
## 68728.2 -905.1
# numberof Month Heat/Hot Water Complaint = 68728.2 - 905.1* number of Month_TAVG
# For example, numberof Month Heat/Hot Water Complaint is 23473 when averger temperture is 50 degree.
#Prodetive Modeling - linear regression
plot(xy2$Month_Sum,xy2$Month_TAVG, main="Monthly TAVG-Heat/Hot Water Complaint Frequency",
xlab="Monthly Complaint ", ylab="Monthly Average Temperture (TAVG)" )
lines(lowess(xy2$Month_Sum , xy2$Month_TAVG), col="blue") # lowess line (x,y)