Getting the information we need

First, read in the data…

oldwd <- getwd()
setwd('MB_2016_QLD')
mesh <- readOGR('.','MB_2016_QLD')
mesh <- st_as_sf(mesh)
setwd(oldwd)

setwd('2016_crimes_ALL_GROUPS_Commerial_final')
crimes <- readOGR('.','2016_crimes_ALL_GROUPS_Commerial_final')
crimes %>% st_as_sf  -> crimes
setwd(oldwd)

Crime Groups

Below is a bar plot showing the relative abundance of the crime types.

ggplot(crimes %>% mutate(Crime_grp=fct_infreq(Crime_grp)),aes(x=Crime_grp)) + geom_bar(fill='indianred')

Geography

These are the locations for the Drug group:

crimes %>% filter(Crime_grp=='Drug') -> drug 
tmap_mode('view')
tm_shape(drug) + tm_dots()

Analysis

Process to get some derived information

crimes %>% 
  filter(Crime_grp=='Drug') %>% 
  mutate(StartDate=ymd_hms(sub('T',' ',StartDate),tz='Australia/Brisbane'),
         hour=hour(StartDate),
         day=wday(StartDate) - 1,
         whour=hour + day*24,
         MB_CODE16=as.character(MB_CODE16)) -> crime_ss 

crime_ss %>%  
  group_by(hour,MB_CODE16) %>% 
  summarise(count=n()) %>% 
  arrange(MB_CODE16,hour) -> crime_ssh

table(crime_ssh$MB_CODE16,crime_ssh$hour) -> occ_mat
colnames(occ_mat) <- paste0("T",0:23,"00")

Cluster Analysis

Then we ran a PAM cluster analysis on the data:

cl <- pam(occ_mat,4,metric='manhattan')

Note

Results

Next we link the clusters back to the mesh blocks and compute the probability of events occuring at each hour for each of the groups of clusters.

occ_mat %>% as_tibble %>% transmute(mb=Var1,time=Var2,n=n) %>% arrange(mb) -> occ_tab
tibble(mb=row.names(occ_mat),cl=cl$clustering) -> clusters
occ_tab %>% left_join(clusters) %>% group_by(time,cl) %>% summarise(prob=mean(n),N=n()) %>% ungroup %>%
  mutate(cl=paste0('Clust',cl,' (',N,')'),time=as.numeric(sub('T','',time)))-> profiles

finally we visualise the results as ‘radar’ plots

ggplot(profiles,aes(x=time,y=prob)) + geom_area(stat='identity',fill='dodgerblue') + 
  facet_wrap(~cl) + coord_polar() +
  scale_x_continuous(breaks=seq(0,1800,by=600),labels=c("Midnight","6am","Noon","6pm"))

Weekly Cycles

crime_ss %>%  
  group_by(whour,MB_CODE16) %>% 
  summarise(count=n()) %>% 
  arrange(MB_CODE16,whour) -> crime_ssw

table(crime_ssw$MB_CODE16,crime_ssw$whour) -> occ_mat
colnames(occ_mat) <- paste0("H",0:167)
cl <- pam(occ_mat,4,metric='manhattan')
occ_mat %>% as_tibble %>% transmute(mb=Var1,time=Var2,n=n) %>% arrange(mb) -> occ_tab
tibble(mb=row.names(occ_mat),cl=cl$clustering) -> clusters
occ_tab %>% left_join(clusters) %>% group_by(time,cl) %>% summarise(prob=mean(n),N=n()) %>% ungroup %>%
  mutate(cl=paste0('Clust',cl,' (',N,')'),time=as.numeric(sub('H','',time)))-> profiles
ggplot(profiles,aes(x=time,y=prob)) + geom_line(stat='identity',col='dodgerblue') + facet_wrap(~cl) + 
  coord_polar() + scale_x_continuous(breaks=(0:6)*24,labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")) 

Geography of clusters

st_centroid(mesh) %>% 
  right_join(clusters %>% transmute(MB_CODE16=mb,cluster=paste0('Cluster ',cl))) -> comclus
tm_shape(comclus) + tm_dots(col='cluster')

Smoothing Spline (Cyclic)

Firstly, cyclic splines are fitted. These have a roughness penalty, and satisfy \(f(x) = f(x+c)\), \(f'(x) = f'(x+c)\) and \(f''(x) = f''(x+c)\) where \(c=\) 24 \(\times\) 7 = 168 hours.

intense <- data.frame(time=NULL,intensity=NULL,se=NULL,cluster=NULL)
for (i in 1:4) occ_tab %>% left_join(clusters) %>% filter(cl==i) %>% mutate(time=as.numeric(sub("H","",time))) %>%
  gam(n~s(time,bs='cc'),data=.,knots=list(time=c(0,168))) %>% predict(newdata=data.frame(time=0:168),se.fit=TRUE) %>% 
  data.frame(time=0:168,intensity=.$fit,se=.$se.fit,cluster=i) %>% rbind(intense) -> intense

Next, we view the splines:

ggplot(intense,aes(x=time,y=intensity)) + geom_area(alpha=0.7,fill='salmon') + facet_wrap(~cluster) + 
  coord_polar()  + 
  scale_x_continuous(breaks=(0:6)*24,
                     labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")) 

Next, the same with error bands:

intense %>% mutate(hi=intensity+2*se,lo=intensity-2*se) %>% filter(time < 168) -> intense2
ggplot(intense2,aes(x=time,y=intensity,ymin=lo,ymax=hi)) + geom_errorbar(alpha=0.7,col='darkgreen') + 
  geom_point(col='wheat',size=0.05) +
  facet_wrap(~cluster) + coord_polar()  +  
  scale_x_continuous(breaks=(0:6)*24,limits=c(0,168),
                     labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")) 

Clustering by Crime Type

Cross tabulate crime counts by mesh block. Note mesh blocks with no crimes are excluded here.

with(crimes,table(MB_CODE16,Crime_grp)) -> crime_profile

Make profiles be proportions rather than absolutes:

sweep(crime_profile,1,rowSums(crime_profile),'/') -> crime_profile

Now do the cluster analysis …

pam(crime_profile,4) -> profile_cluster
profile_cluster$medoid %>% cbind(Count=table(profile_cluster$clustering)) -> meds
knitr::kable(meds,digits=3)
Drug Property Damage Property Theft Public Nuisance Violent Count
30563927900 0.461 0.091 0.266 0.097 0.084 225
30189892000 0.074 0.107 0.254 0.500 0.066 385
30276530000 0.111 0.133 0.533 0.156 0.067 641
30275470000 0.042 0.031 0.881 0.024 0.022 465

… and a map:

profile_cluster$clustering %>% data.frame(MB_CODE16=names(.),cluster=paste0("Cluster ",.)) %>% .[,-1] -> clustlut
st_centroid(mesh) %>% right_join(clustlut) -> clustpts
tm_shape(clustpts) + tm_dots(col='cluster')

Isometric Log Ratio Transformed Data

pam(ilr(crime_profile),4,metric='manhattan') -> profile_cluster_ilr
meds_ilr <- ilrInv(profile_cluster_ilr$medoids)
colnames(meds_ilr) <- colnames(crime_profile)
meds_ilr %>% cbind(Count=table(profile_cluster_ilr$clustering)) -> meds_ilr
knitr::kable(meds_ilr,digits=3)
Drug Property Damage Property Theft Public Nuisance Violent Count
30612870000 0.200 0.200 0.200 0.200 0.200 655
30434654000 0.125 0.062 0.187 0.562 0.062 208
30315981000 0.127 0.127 0.380 0.183 0.183 485
30139231000 0.086 0.086 0.657 0.114 0.057 368

… and a map.

profile_cluster_ilr$clustering %>% data.frame(MB_CODE16=names(.),ilr_cluster=paste0("ilr Cluster ",.)) %>% .[,-1] -> clustlut_ilr
st_centroid(mesh) %>% right_join(clustlut_ilr) -> clustpts_ilr
tm_shape(clustpts_ilr) + tm_dots(col='ilr_cluster')
 clustlut %>% left_join(clustlut_ilr) %>% with(.,table(cluster,ilr_cluster)) %>% kable
ilr Cluster 1 ilr Cluster 2 ilr Cluster 3 ilr Cluster 4
Cluster 1 196 9 13 7
Cluster 2 149 188 44 4
Cluster 3 177 11 281 172
Cluster 4 133 0 147 185