Initial Findings

Getting the information we need

First, read in the data…

oldwd <- getwd()
setwd('MB_2016_QLD')
mesh <- readOGR('.','MB_2016_QLD')
mesh <- st_as_sf(mesh)
setwd(oldwd)

setwd('2016_crimes_ALL_GROUPS_Commerial_final')
crimes <- readOGR('.','2016_crimes_ALL_GROUPS_Commerial_final')
crimes %>% st_as_sf  -> crimes
setwd(oldwd)

Crime Groups

Below is a bar plot showing the relative abundance of the crime types.

ggplot(crimes %>% mutate(Crime_grp=fct_infreq(Crime_grp)),aes(x=Crime_grp)) + geom_bar(fill='indianred')

Geography

These are the locations for the Drug group:

crimes %>% filter(Crime_grp=='Drug') -> drug 
tmap_mode('view')
tm_shape(drug) + tm_dots()

Analysis

Process to get some derived information

crimes %>% 
  filter(Crime_grp=='Drug') %>% 
  mutate(StartDate=ymd_hms(sub('T',' ',StartDate),tz='Australia/Brisbane'),
         hour=hour(StartDate),
         day=wday(StartDate) - 1,
         whour=hour + day*24,
         MB_CODE16=as.character(MB_CODE16)) -> crime_ss 

crime_ss %>%  
  group_by(hour,MB_CODE16) %>% 
  summarise(count=n()) %>% 
  arrange(MB_CODE16,hour) -> crime_ssh

table(crime_ssh$MB_CODE16,crime_ssh$hour) -> occ_mat
colnames(occ_mat) <- paste0("T",0:23,"00")

Cluster Analysis

Then we ran a PAM cluster analysis on the data:

cl <- pam(occ_mat,4,metric='manhattan')

Note

We use a Manhattan distance metric
We used 4 clusters after some exploration

Results

Next we link the clusters back to the mesh blocks and compute the probability of events occuring at each hour for each of the groups of clusters.

occ_mat %>% as_tibble %>% transmute(mb=Var1,time=Var2,n=n) %>% arrange(mb) -> occ_tab
tibble(mb=row.names(occ_mat),cl=cl$clustering) -> clusters
occ_tab %>% left_join(clusters) %>% group_by(time,cl) %>% summarise(prob=mean(n),N=n()) %>% ungroup %>%
  mutate(cl=paste0('Clust',cl,' (',N,')'),time=as.numeric(sub('T','',time)))-> profiles

finally we visualise the results as ‘radar’ plots

ggplot(profiles,aes(x=time,y=prob)) + geom_area(stat='identity',fill='dodgerblue') + 
  facet_wrap(~cl) + coord_polar() +
  scale_x_continuous(breaks=seq(0,1800,by=600),labels=c("Midnight","6am","Noon","6pm"))

Weekly Cycles

crime_ss %>%  
  group_by(whour,MB_CODE16) %>% 
  summarise(count=n()) %>% 
  arrange(MB_CODE16,whour) -> crime_ssw

table(crime_ssw$MB_CODE16,crime_ssw$whour) -> occ_mat
colnames(occ_mat) <- paste0("H",0:167)
cl <- pam(occ_mat,4,metric='manhattan')
occ_mat %>% as_tibble %>% transmute(mb=Var1,time=Var2,n=n) %>% arrange(mb) -> occ_tab
tibble(mb=row.names(occ_mat),cl=cl$clustering) -> clusters
occ_tab %>% left_join(clusters) %>% group_by(time,cl) %>% summarise(prob=mean(n),N=n()) %>% ungroup %>%
  mutate(cl=paste0('Clust',cl,' (',N,')'),time=as.numeric(sub('H','',time)))-> profiles
ggplot(profiles,aes(x=time,y=prob)) + geom_line(stat='identity',col='dodgerblue') + facet_wrap(~cl) + 
  coord_polar() + scale_x_continuous(breaks=(0:6)*24,labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))

Geography of clusters

st_centroid(mesh) %>% 
  right_join(clusters %>% transmute(MB_CODE16=mb,cluster=paste0('Cluster ',cl))) -> comclus
tm_shape(comclus) + tm_dots(col='cluster')

Smoothing Spline (Cyclic)

Firstly, cyclic splines are fitted. These have a roughness penalty, and satisfy \(f(x) = f(x+c)\), \(f'(x) = f'(x+c)\) and \(f''(x) = f''(x+c)\) where \(c=\) 24 \(\times\) 7 = 168 hours.

intense <- data.frame(time=NULL,intensity=NULL,se=NULL,cluster=NULL)
for (i in 1:4) occ_tab %>% left_join(clusters) %>% filter(cl==i) %>% mutate(time=as.numeric(sub("H","",time))) %>%
  gam(n~s(time,bs='cc'),data=.,knots=list(time=c(0,168))) %>% predict(newdata=data.frame(time=0:168),se.fit=TRUE) %>% 
  data.frame(time=0:168,intensity=.$fit,se=.$se.fit,cluster=i) %>% rbind(intense) -> intense

Next, we view the splines:

ggplot(intense,aes(x=time,y=intensity)) + geom_area(alpha=0.7,fill='salmon') + facet_wrap(~cluster) + 
  coord_polar()  + 
  scale_x_continuous(breaks=(0:6)*24,
                     labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))

Next, the same with error bands:

intense %>% mutate(hi=intensity+2*se,lo=intensity-2*se) %>% filter(time < 168) -> intense2
ggplot(intense2,aes(x=time,y=intensity,ymin=lo,ymax=hi)) + geom_errorbar(alpha=0.7,col='darkgreen') + 
  geom_point(col='wheat',size=0.05) +
  facet_wrap(~cluster) + coord_polar()  +  
  scale_x_continuous(breaks=(0:6)*24,limits=c(0,168),
                     labels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))

Clustering by Crime Type

Cross tabulate crime counts by mesh block. Note mesh blocks with no crimes are excluded here.

with(crimes,table(MB_CODE16,Crime_grp)) -> crime_profile

Make profiles be proportions rather than absolutes:

sweep(crime_profile,1,rowSums(crime_profile),'/') -> crime_profile

Now do the cluster analysis …

pam(crime_profile,4) -> profile_cluster
profile_cluster$medoid %>% cbind(Count=table(profile_cluster$clustering)) -> meds
knitr::kable(meds,digits=3)

	Drug	Property Damage	Property Theft	Public Nuisance	Violent	Count
30563927900	0.461	0.091	0.266	0.097	0.084	225
30189892000	0.074	0.107	0.254	0.500	0.066	385
30276530000	0.111	0.133	0.533	0.156	0.067	641
30275470000	0.042	0.031	0.881	0.024	0.022	465

… and a map:

profile_cluster$clustering %>% data.frame(MB_CODE16=names(.),cluster=paste0("Cluster ",.)) %>% .[,-1] -> clustlut
st_centroid(mesh) %>% right_join(clustlut) -> clustpts
tm_shape(clustpts) + tm_dots(col='cluster')

Isometric Log Ratio Transformed Data

pam(ilr(crime_profile),4,metric='manhattan') -> profile_cluster_ilr
meds_ilr <- ilrInv(profile_cluster_ilr$medoids)
colnames(meds_ilr) <- colnames(crime_profile)
meds_ilr %>% cbind(Count=table(profile_cluster_ilr$clustering)) -> meds_ilr
knitr::kable(meds_ilr,digits=3)

	Drug	Property Damage	Property Theft	Public Nuisance	Violent	Count
30612870000	0.200	0.200	0.200	0.200	0.200	655
30434654000	0.125	0.062	0.187	0.562	0.062	208
30315981000	0.127	0.127	0.380	0.183	0.183	485
30139231000	0.086	0.086	0.657	0.114	0.057	368

… and a map.

profile_cluster_ilr$clustering %>% data.frame(MB_CODE16=names(.),ilr_cluster=paste0("ilr Cluster ",.)) %>% .[,-1] -> clustlut_ilr
st_centroid(mesh) %>% right_join(clustlut_ilr) -> clustpts_ilr
tm_shape(clustpts_ilr) + tm_dots(col='ilr_cluster')

 clustlut %>% left_join(clustlut_ilr) %>% with(.,table(cluster,ilr_cluster)) %>% kable

	ilr Cluster 1	ilr Cluster 2	ilr Cluster 3	ilr Cluster 4
Cluster 1	196	9	13	7
Cluster 2	149	188	44	4
Cluster 3	177	11	281	172
Cluster 4	133	0	147	185