raw<-read.csv("crime.csv", skip = 3, header = TRUE, na.strings = "")
sub<-raw[10:33, c(1:3,8)]
#fill group value
library('zoo')
sub[1]<-na.locf(sub[1])
#trim leading and trailing spaces; remove "," from numbers
trim <- function (x) gsub("^\\s+|\\s+$|,", "", x)
df <- data.frame(sapply(sub, trim))
#create id/pk for each group
dflong <- cbind(rep(1:8, each=3), df)
names(dflong) <- c("group.id","group","variable","violent.crime","property.crime")
head(dflong)
## group.id group variable violent.crime
## 1 1 1000000 and over (Group I subset) 2012 166007
## 2 1 1000000 and over (Group I subset) 2013 161252
## 3 1 1000000 and over (Group I subset) Percent change -2.9
## 4 2 500000 to 999999 (Group I subset) 2012 138439
## 5 2 500000 to 999999 (Group I subset) 2013 134216
## 6 2 500000 to 999999 (Group I subset) Percent change -3.1
## property.crime
## 1 783251
## 2 760143
## 3 -3.0
## 4 747008
## 5 733283
## 6 -1.8
#conversion between long and wide format
library(tidyr)
v<-cbind(dflong[1:4] %>%
#unite(key2, violent.crime, murder, sep = "_") %>%
spread(variable, violent.crime), rep("violent.crime",8))
names(v)[3:6]<-c("Yr12", "Yr13", "change", "crime")
m<-cbind(dflong[c(1:3,5)] %>%
spread(variable, property.crime), rep("property.crime",8))
names(m)[3:6]<-names(v)[3:6]
dfwide<-rbind(v,m)
#convert factor to numeric for analysis
number <- function(x) {as.numeric(levels(x))[x]}
dfwide[3:4] <- data.frame(sapply(dfwide[3:4],number))
head(dfwide)
## group.id group Yr12 Yr13 change
## 1 1 1000000 and over (Group I subset) 166007 161252 -2.9
## 2 2 500000 to 999999 (Group I subset) 138439 134216 -3.1
## 3 3 250000 to 499999 (Group I subset) 112439 107520 -4.4
## 4 4 GROUP II (100000 to 249999) 154144 145408 -5.7
## 5 5 GROUP III (50000 to 99999) 116180 110033 -5.3
## 6 6 GROUP IV (25000 to 49999) 85900 80441 -6.4
## crime
## 1 violent.crime
## 2 violent.crime
## 3 violent.crime
## 4 violent.crime
## 5 violent.crime
## 6 violent.crime
#read data and escape quotes
raw2 <- read.csv("care.csv", header = TRUE, quote = "\"", na.strings = "")
lst <- strsplit(as.character(raw2[,9]), "[^0-9]+")
#fill unequal length and create daily hour calculations
indx <- lengths(lst)
df <- as.data.frame(do.call(rbind,lapply(lst, `length<-`, max(indx))))
res <- as.data.frame(sapply(df[,2:3],number))
aide <- cbind(res, res[1]/res[2])
#bind data to original dataframe
colnames(aide) <- c("Aide_Hours_Week", "Aide_Days", "Aide_Hours_Day")
data <- cbind(raw2[,1:8], aide)
#prepare for numeric analysis
library("stringr")
num <- function(x) {as.numeric(str_replace(x,",",""))}
data$Auth_Units <- num(data$Auth_Units)
head(data)
## County Auth_FromDate Auth_ToDate Auth_Units Auth_Months Auth_Weeks
## 1 Bronx 05/01/2015 10/31/2015 243 6 30
## 2 Kings 05/01/2015 10/31/2015 321 6 30
## 3 Kings 08/01/2015 10/31/2015 558 3 15
## 4 Kings 05/01/2015 07/31/2015 1116 3 15
## 5 Kings 05/01/2015 07/31/2015 92 3 15
## 6 Queens 05/01/2015 07/31/2015 216 3 15
## Hrs_Per_Week Creation_Date Aide_Hours_Week Aide_Days Aide_Hours_Day
## 1 8 04/29/2015 9 3 3
## 2 10 05/04/2015 12 4 3
## 3 37 04/28/2015 42 7 6
## 4 74 04/28/2015 84 7 12
## 5 0 05/05/2015 1 NA NA
## 6 14 05/06/2015 16 4 4
## Warning: Removed 1608 rows containing non-finite values (stat_boxplot).
#read and cleanse
raw3<-read.csv("trade.csv", header = TRUE, skip = 1, na.strings = "")
dat<-raw3[rowSums(is.na(raw3))!=8, ]
dat[1]<-na.locf(dat[1])
dat[,3:8]<-data.frame(sapply(dat[3:8],num))
#calculate trade surplus
dat <- cbind(dat, dat[3]-dat[4], dat[5]-dat[6], dat[7]-dat[8])
ts <- dat[,c(1,2,9:11)]
names(ts) <- c("month","category","S13","S14","S15")
tsl <- ts %>% gather(year, surplus, S13:S15)
tsl
## month category year surplus
## 1 January Goods S13 -60517
## 2 January Services S13 18888
## 3 February Goods S13 -61647
## 4 February Services S13 18893
## 5 March Goods S13 -56233
## 6 March Services S13 18709
## 7 April Goods S13 -58039
## 8 April Services S13 18274
## 9 January Goods S14 -59968
## 10 January Services S14 20506
## 11 February Goods S14 -61292
## 12 February Services S14 18456
## 13 March Goods S14 -63050
## 14 March Services S14 19929
## 15 April Goods S14 -64321
## 16 April Services S14 20051
## 17 January Goods S15 -62950
## 18 January Services S15 19362
## 19 February Goods S15 -58041
## 20 February Services S15 19503
## 21 March Goods S15 -71164
## 22 March Services S15 19002
## 23 April Goods S15 -61591
## 24 April Services S15 19334