First, read in the data…
oldwd <- getwd()
setwd('MB_2016_QLD')
mesh <- readOGR('.','MB_2016_QLD')
mesh <- st_as_sf(mesh)
setwd(oldwd)
setwd('2016_crimes_ALL_GROUPS_Commerial_final')
crimes <- readOGR('.','2016_crimes_ALL_GROUPS_Commerial_final')
crimes %>% st_as_sf -> crimes
setwd(oldwd)Below is a bar plot showing the relative abundance of the crime types.
ggplot(crimes %>% mutate(Crime_grp=fct_infreq(Crime_grp)),aes(x=Crime_grp)) + geom_bar(fill='indianred')These are the locations for the Drug group:
crimes %>% filter(Crime_grp=='Drug') -> drug
tmap_mode('view')
tm_shape(drug) + tm_dots()Process to get some derived information
crimes %>%
filter(Crime_grp=='Drug') %>%
mutate(StartDate=ymd_hms(sub('T',' ',StartDate),tz='Australia/Brisbane'),
hour=hour(StartDate),
day=wday(StartDate) - 1,
whour=hour + day*24,
MB_CODE16=as.character(MB_CODE16)) -> crime_ss
crime_ss %>%
group_by(hour,MB_CODE16) %>%
summarise(count=n()) %>%
arrange(MB_CODE16,hour) -> crime_ssh
table(crime_ssh$MB_CODE16,crime_ssh$hour) -> occ_mat
colnames(occ_mat) <- paste0("T",0:23,"00")Then we ran a PAM cluster analysis on the data:
cl <- pam(occ_mat,4,metric='manhattan')Note
Next we link the clusters back to the mesh blocks and compute the probability of events occuring at each hour for each of the groups of clusters.
occ_mat %>% as_tibble %>% transmute(mb=Var1,time=Var2,n=n) %>% arrange(mb) -> occ_tab
tibble(mb=row.names(occ_mat),cl=cl$clustering) -> clusters
occ_tab %>% left_join(clusters) %>% group_by(time,cl) %>% summarise(prob=mean(n),N=n()) %>% ungroup %>%
mutate(cl=paste0('Clust',cl,' (',N,')'),time=as.numeric(sub('T','',time)))-> profilesfinally we visualise the results as ‘radar’ plots
ggplot(profiles,aes(x=time,y=prob)) + geom_area(stat='identity',fill='dodgerblue') +
facet_wrap(~cl) + coord_polar() +
scale_x_continuous(breaks=seq(0,1800,by=600),labels=c("Midnight","6am","Noon","6pm"))crime_ss %>%
group_by(whour,MB_CODE16) %>%
summarise(count=n()) %>%
arrange(MB_CODE16,whour) -> crime_ssw
table(crime_ssw$MB_CODE16,crime_ssw$whour) -> occ_mat
colnames(occ_mat) <- paste0("H",0:167)
cl <- pam(occ_mat,4,metric='manhattan')
occ_mat %>% as_tibble %>% transmute(mb=Var1,time=Var2,n=n) %>% arrange(mb) -> occ_tab
tibble(mb=row.names(occ_mat),cl=cl$clustering) -> clusters
occ_tab %>% left_join(clusters) %>% group_by(time,cl) %>% summarise(prob=mean(n),N=n()) %>% ungroup %>%
mutate(cl=paste0('Clust',cl,' (',N,')'),time=as.numeric(sub('H','',time)))-> profiles
ggplot(profiles,aes(x=time,y=prob)) + geom_line(stat='identity',col='dodgerblue') + facet_wrap(~cl) +
coord_polar() + scale_x_continuous(breaks=(0:6)*24,labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")) st_centroid(mesh) %>%
right_join(clusters %>% transmute(MB_CODE16=mb,cluster=paste0('Cluster ',cl))) -> comclus
tm_shape(comclus) + tm_dots(col='cluster')Firstly, cyclic splines are fitted. These have a roughness penalty, and satisfy \(f(x) = f(x+c)\), \(f'(x) = f'(x+c)\) and \(f''(x) = f''(x+c)\) where \(c=\) 24 \(\times\) 7 = 168 hours.
intense <- data.frame(time=NULL,intensity=NULL,se=NULL,cluster=NULL)
for (i in 1:4) occ_tab %>% left_join(clusters) %>% filter(cl==i) %>% mutate(time=as.numeric(sub("H","",time))) %>%
gam(n~s(time,bs='cc'),data=.,knots=list(time=c(0,168))) %>% predict(newdata=data.frame(time=0:168),se.fit=TRUE) %>%
data.frame(time=0:168,intensity=.$fit,se=.$se.fit,cluster=i) %>% rbind(intense) -> intenseNext, we view the splines:
ggplot(intense,aes(x=time,y=intensity)) + geom_area(alpha=0.7,fill='salmon') + facet_wrap(~cluster) +
coord_polar() +
scale_x_continuous(breaks=(0:6)*24,
labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")) Next, the same with error bands:
intense %>% mutate(hi=intensity+2*se,lo=intensity-2*se) %>% filter(time < 168) -> intense2
ggplot(intense2,aes(x=time,y=intensity,ymin=lo,ymax=hi)) + geom_errorbar(alpha=0.7,col='darkgreen') +
geom_point(col='wheat',size=0.05) +
facet_wrap(~cluster) + coord_polar() +
scale_x_continuous(breaks=(0:6)*24,limits=c(0,168),
labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")) Cross tabulate crime counts by mesh block. Note mesh blocks with no crimes are excluded here.
with(crimes,table(MB_CODE16,Crime_grp)) -> crime_profileMake profiles be proportions rather than absolutes:
sweep(crime_profile,1,rowSums(crime_profile),'/') -> crime_profileNow do the cluster analysis …
pam(crime_profile,4) -> profile_cluster
profile_cluster$medoid %>% cbind(Count=table(profile_cluster$clustering)) -> meds
knitr::kable(meds,digits=3)| Drug | Property Damage | Property Theft | Public Nuisance | Violent | Count | |
|---|---|---|---|---|---|---|
| 30563927900 | 0.461 | 0.091 | 0.266 | 0.097 | 0.084 | 225 |
| 30189892000 | 0.074 | 0.107 | 0.254 | 0.500 | 0.066 | 385 |
| 30276530000 | 0.111 | 0.133 | 0.533 | 0.156 | 0.067 | 641 |
| 30275470000 | 0.042 | 0.031 | 0.881 | 0.024 | 0.022 | 465 |
… and a map:
profile_cluster$clustering %>% data.frame(MB_CODE16=names(.),cluster=paste0("Cluster ",.)) %>% .[,-1] -> clustlut
st_centroid(mesh) %>% right_join(clustlut) -> clustpts
tm_shape(clustpts) + tm_dots(col='cluster')pam(ilr(crime_profile),4,metric='manhattan') -> profile_cluster_ilr
meds_ilr <- ilrInv(profile_cluster_ilr$medoids)
colnames(meds_ilr) <- colnames(crime_profile)
meds_ilr %>% cbind(Count=table(profile_cluster_ilr$clustering)) -> meds_ilr
knitr::kable(meds_ilr,digits=3)| Drug | Property Damage | Property Theft | Public Nuisance | Violent | Count | |
|---|---|---|---|---|---|---|
| 30612870000 | 0.200 | 0.200 | 0.200 | 0.200 | 0.200 | 655 |
| 30434654000 | 0.125 | 0.062 | 0.187 | 0.562 | 0.062 | 208 |
| 30315981000 | 0.127 | 0.127 | 0.380 | 0.183 | 0.183 | 485 |
| 30139231000 | 0.086 | 0.086 | 0.657 | 0.114 | 0.057 | 368 |
… and a map.
profile_cluster_ilr$clustering %>% data.frame(MB_CODE16=names(.),ilr_cluster=paste0("ilr Cluster ",.)) %>% .[,-1] -> clustlut_ilr
st_centroid(mesh) %>% right_join(clustlut_ilr) -> clustpts_ilr
tm_shape(clustpts_ilr) + tm_dots(col='ilr_cluster') clustlut %>% left_join(clustlut_ilr) %>% with(.,table(cluster,ilr_cluster)) %>% kable| ilr Cluster 1 | ilr Cluster 2 | ilr Cluster 3 | ilr Cluster 4 | |
|---|---|---|---|---|
| Cluster 1 | 196 | 9 | 13 | 7 |
| Cluster 2 | 149 | 188 | 44 | 4 |
| Cluster 3 | 177 | 11 | 281 | 172 |
| Cluster 4 | 133 | 0 | 147 | 185 |